// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX                                                                           XX
XX                          Morph                                            XX
XX                                                                           XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

#include "jitpch.h"
#ifdef _MSC_VER
#pragma hdrstop
#endif

#include "allocacheck.h" // for alloca

// Convert the given node into a call to the specified helper passing
// the given argument list.
//
// Tries to fold constants and also adds an edge for overflow exception
// returns the morphed tree
GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
{
    GenTree* result;

    /* If the operand is a constant, we'll try to fold it */
    if (oper->OperIsConst())
    {
        GenTreePtr oldTree = tree;

        tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)

        if (tree != oldTree)
        {
            return fgMorphTree(tree);
        }
        else if (tree->OperKind() & GTK_CONST)
        {
            return fgMorphConst(tree);
        }

        // assert that oper is unchanged and that it is still a GT_CAST node
        noway_assert(tree->gtCast.CastOp() == oper);
        noway_assert(tree->gtOper == GT_CAST);
    }
    result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
    assert(result == tree);
    return result;
}

/*****************************************************************************
 *
 *  Convert the given node into a call to the specified helper passing
 *  the given argument list.
 */

GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
{
    // The helper call ought to be semantically equivalent to the original node, so preserve its VN.
    tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);

    tree->gtFlags |= GTF_CALL;
    if (args)
    {
        tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
    }
    tree->gtCall.gtCallType            = CT_HELPER;
    tree->gtCall.gtCallMethHnd         = eeFindHelper(helper);
    tree->gtCall.gtCallArgs            = args;
    tree->gtCall.gtCallObjp            = nullptr;
    tree->gtCall.gtCallLateArgs        = nullptr;
    tree->gtCall.fgArgInfo             = nullptr;
    tree->gtCall.gtRetClsHnd           = nullptr;
    tree->gtCall.gtCallMoreFlags       = 0;
    tree->gtCall.gtInlineCandidateInfo = nullptr;
    tree->gtCall.gtControlExpr         = nullptr;

#ifdef LEGACY_BACKEND
    tree->gtCall.gtCallRegUsedMask = RBM_NONE;
#endif // LEGACY_BACKEND

#if DEBUG
    // Helper calls are never candidates.

    tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
#endif // DEBUG

#ifdef FEATURE_READYTORUN_COMPILER
    tree->gtCall.gtEntryPoint.addr = nullptr;
#endif

#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
    if (varTypeIsLong(tree))
    {
        GenTreeCall*    callNode    = tree->AsCall();
        ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
        retTypeDesc->Reset();
        retTypeDesc->InitializeLongReturnType(this);
        callNode->ClearOtherRegs();
    }
#endif // _TARGET_XXX_

    /* Perform the morphing */

    tree = fgMorphArgs(tree->AsCall());

    return tree;
}

/*****************************************************************************
 *
 *  Determine if a relop must be morphed to a qmark to manifest a boolean value.
 *  This is done when code generation can't create straight-line code to do it.
 */
bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
{
#ifndef LEGACY_BACKEND
    return false;
#else  // LEGACY_BACKEND
    return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
#endif // LEGACY_BACKEND
}

/*****************************************************************************
 *
 *  Morph a cast node (we perform some very simple transformations here).
 */

#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
{
    noway_assert(tree->gtOper == GT_CAST);
    noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));

    /* The first sub-operand is the thing being cast */

    GenTreePtr oper = tree->gtCast.CastOp();

    if (fgGlobalMorph && (oper->gtOper == GT_ADDR))
    {
        // Make sure we've checked if 'oper' is an address of an implicit-byref parameter.
        // If it is, fgMorphImplicitByRefArgs will change its type, and we want the cast
        // morphing code to see that type.
        fgMorphImplicitByRefArgs(oper);
    }

    var_types srcType = genActualType(oper->TypeGet());
    unsigned  srcSize;

    var_types dstType = tree->CastToType();
    unsigned  dstSize = genTypeSize(dstType);

    // See if the cast has to be done in two steps.  R -> I
    if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
    {
        // Only x86 must go through TYP_DOUBLE to get to all
        // integral types everybody else can get straight there
        // except for when using helpers
        if (srcType == TYP_FLOAT
#if !FEATURE_STACK_FP_X87

#if defined(_TARGET_ARM64_)
            // Amd64: src = float, dst is overflow conversion.
            // This goes through helper and hence src needs to be converted to double.
            && tree->gtOverflow()
#elif defined(_TARGET_AMD64_)
            // Amd64: src = float, dst = uint64 or overflow conversion.
            // This goes through helper and hence src needs to be converted to double.
            && (tree->gtOverflow() || (dstType == TYP_ULONG))
#elif defined(_TARGET_ARM_)
            // Arm: src = float, dst = int64/uint64 or overflow conversion.
            && (tree->gtOverflow() || varTypeIsLong(dstType))
#endif

#endif // FEATURE_STACK_FP_X87
                )
        {
            oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
        }

        // do we need to do it in two steps R -> I, '-> smallType
        CLANG_FORMAT_COMMENT_ANCHOR;

#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
        if (dstSize < genTypeSize(TYP_INT))
        {
            oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
            oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
            tree->gtFlags &= ~GTF_UNSIGNED;
        }
#else
        if (dstSize < sizeof(void*))
        {
            oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
        }
#endif
        else
        {
            /* Note that if we need to use a helper call then we can not morph oper */
            if (!tree->gtOverflow())
            {
#ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
                goto OPTIMIZECAST;
#else
                switch (dstType)
                {
                    case TYP_INT:
#ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
#ifdef LEGACY_BACKEND
                        // the RyuJIT backend does not use the x87 FPU and therefore
                        // does not support folding the cast conv.i4(round.d(d))
                        if ((oper->gtOper == GT_INTRINSIC) &&
                            (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
                        {
                            /* optimization: conv.i4(round.d(d)) -> round.i(d) */
                            oper->gtType = dstType;
                            return fgMorphTree(oper);
                        }
                        // if SSE2 is not enabled, we need the helper
                        else
#endif // LEGACY_BACKEND
                            if (!opts.compCanUseSSE2)
                        {
                            return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
                        }
                        else
#endif // _TARGET_X86_
                        {
                            goto OPTIMIZECAST;
                        }
#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
                    case TYP_UINT:
                        goto OPTIMIZECAST;
#else  // _TARGET_ARM_
                    case TYP_UINT:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
#endif // _TARGET_ARM_

#ifdef _TARGET_AMD64_
                    // SSE2 has instructions to convert a float/double directly to a long
                    case TYP_LONG:
                        goto OPTIMIZECAST;
#else
                    case TYP_LONG:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
#endif //_TARGET_AMD64_
                    case TYP_ULONG:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
                    default:
                        break;
                }
#endif // _TARGET_ARM64_
            }
            else
            {
                switch (dstType)
                {
                    case TYP_INT:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
                    case TYP_UINT:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
                    case TYP_LONG:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
                    case TYP_ULONG:
                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
                    default:
                        break;
                }
            }
            noway_assert(!"Unexpected dstType");
        }
    }
#ifndef _TARGET_64BIT_
    // The code generation phase (for x86 & ARM32) does not handle casts
    // directly from [u]long to anything other than [u]int. Insert an
    // intermediate cast to native int.
    else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
    {
        oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
        oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
        tree->gtFlags &= ~GTF_UNSIGNED;
    }
#endif //!_TARGET_64BIT_

#ifdef _TARGET_ARM_
    else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
             !varTypeIsLong(oper->gtCast.CastOp()))
    {
        // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
        // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
        // This happens semi-frequently because there is no IL 'conv.r4.un'
        oper->gtType       = TYP_FLOAT;
        oper->CastToType() = TYP_FLOAT;
        return fgMorphTree(oper);
    }
    // converts long/ulong --> float/double casts into helper calls.
    else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
    {
        if (dstType == TYP_FLOAT)
        {
            // there is only a double helper, so we
            // - change the dsttype to double
            // - insert a cast from double to float
            // - recurse into the resulting tree
            tree->CastToType() = TYP_DOUBLE;
            tree->gtType       = TYP_DOUBLE;

            tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);

            return fgMorphTree(tree);
        }
        if (tree->gtFlags & GTF_UNSIGNED)
            return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
        return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
    }
#endif //_TARGET_ARM_

#ifdef _TARGET_AMD64_
    // Do we have to do two step U4/8 -> R4/8 ?
    // Codegen supports the following conversion as one-step operation
    // a) Long -> R4/R8
    // b) U8 -> R8
    //
    // The following conversions are performed as two-step operations using above.
    // U4 -> R4/8 = U4-> Long -> R4/8
    // U8 -> R4   = U8 -> R8 -> R4
    else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
    {
        srcType = genUnsignedType(srcType);

        if (srcType == TYP_ULONG)
        {
            if (dstType == TYP_FLOAT)
            {
                // Codegen can handle U8 -> R8 conversion.
                // U8 -> R4 =  U8 -> R8 -> R4
                // - change the dsttype to double
                // - insert a cast from double to float
                // - recurse into the resulting tree
                tree->CastToType() = TYP_DOUBLE;
                tree->gtType       = TYP_DOUBLE;
                tree               = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
                return fgMorphTree(tree);
            }
        }
        else if (srcType == TYP_UINT)
        {
            oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
            tree->gtFlags &= ~GTF_UNSIGNED;
        }
    }
#endif // _TARGET_AMD64_

#ifdef _TARGET_X86_
    // Do we have to do two step U4/8 -> R4/8 ?
    else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
    {
        srcType = genUnsignedType(srcType);

        if (srcType == TYP_ULONG)
        {
            return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
        }
        else if (srcType == TYP_UINT)
        {
            oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
            tree->gtFlags &= ~GTF_UNSIGNED;
#ifndef LEGACY_BACKEND
            return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
#endif
        }
    }
#ifndef LEGACY_BACKEND
    else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
    {
        return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
    }
#endif
#endif //_TARGET_XARCH_
    else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
    {
        // We are casting away GC information.  we would like to just
        // change the type to int, however this gives the emitter fits because
        // it believes the variable is a GC variable at the begining of the
        // instruction group, but is not turned non-gc by the code generator
        // we fix this by copying the GC pointer to a non-gc pointer temp.
        noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");

        // We generate an assignment to an int and then do the cast from an int. With this we avoid
        // the gc problem and we allow casts to bytes, longs,  etc...
        unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
        oper->gtType    = TYP_I_IMPL;
        GenTreePtr asg  = gtNewTempAssign(lclNum, oper);
        oper->gtType    = srcType;

        // do the real cast
        GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);

        // Generate the comma tree
        oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);

        return fgMorphTree(oper);
    }

    // Look for narrowing casts ([u]long -> [u]int) and try to push them
    // down into the operand before morphing it.
    //
    // It doesn't matter if this is cast is from ulong or long (i.e. if
    // GTF_UNSIGNED is set) because the transformation is only applied to
    // overflow-insensitive narrowing casts, which always silently truncate.
    //
    // Note that casts from [u]long to small integer types are handled above.
    if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
    {
        // As a special case, look for overflow-sensitive casts of an AND
        // expression, and see if the second operand is a small constant. Since
        // the result of an AND is bound by its smaller operand, it may be
        // possible to prove that the cast won't overflow, which will in turn
        // allow the cast's operand to be transformed.
        if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
        {
            GenTreePtr andOp2 = oper->gtOp.gtOp2;

            // Special case to the special case: AND with a casted int.
            if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
            {
                // gtFoldExprConst will deal with whether the cast is signed or
                // unsigned, or overflow-sensitive.
                andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
            }

            // Look for a constant less than 2^{32} for a cast to uint, or less
            // than 2^{31} for a cast to int.
            int maxWidth = (dstType == TYP_UINT) ? 32 : 31;

            if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
            {
                // This cast can't overflow.
                tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
            }
        }

        // Only apply this transformation during global morph,
        // when neither the cast node nor the oper node may throw an exception
        // based on the upper 32 bits.
        //
        if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
        {
            // For these operations the lower 32 bits of the result only depends
            // upon the lower 32 bits of the operands
            //
            if (oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG, GT_LSH))
            {
                DEBUG_DESTROY_NODE(tree);

                // Insert narrowing casts for op1 and op2
                oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
                if (oper->gtOp.gtOp2 != nullptr)
                {
                    oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
                }

                // Clear the GT_MUL_64RSLT if it is set
                if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
                {
                    oper->gtFlags &= ~GTF_MUL_64RSLT;
                }

                // The operation now produces a 32-bit result.
                oper->gtType = TYP_INT;

                // Remorph the new tree as the casts that we added may be folded away.
                return fgMorphTree(oper);
            }
        }
    }

OPTIMIZECAST:
    noway_assert(tree->gtOper == GT_CAST);

    /* Morph the operand */
    tree->gtCast.CastOp() = oper = fgMorphTree(oper);

    /* Reset the call flag */
    tree->gtFlags &= ~GTF_CALL;

    /* unless we have an overflow cast, reset the except flag */
    if (!tree->gtOverflow())
    {
        tree->gtFlags &= ~GTF_EXCEPT;
    }

    /* Just in case new side effects were introduced */
    tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);

    srcType = oper->TypeGet();

    /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
    if (tree->gtFlags & GTF_UNSIGNED)
    {
        srcType = genUnsignedType(srcType);
    }

    srcSize = genTypeSize(srcType);

    if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
    {
        /* See if we can discard the cast */
        if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
        {
            if (srcType == dstType)
            { // Certainly if they are identical it is pointless
                goto REMOVE_CAST;
            }

            if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
            {
                unsigned   varNum = oper->gtLclVarCommon.gtLclNum;
                LclVarDsc* varDsc = &lvaTable[varNum];
                if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
                {
                    goto REMOVE_CAST;
                }
            }

            bool unsignedSrc = varTypeIsUnsigned(srcType);
            bool unsignedDst = varTypeIsUnsigned(dstType);
            bool signsDiffer = (unsignedSrc != unsignedDst);

            // For same sized casts with
            //    the same signs or non-overflow cast we discard them as well
            if (srcSize == dstSize)
            {
                /* This should have been handled above */
                noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));

                if (!signsDiffer)
                {
                    goto REMOVE_CAST;
                }

                if (!tree->gtOverflow())
                {
                    /* For small type casts, when necessary we force
                       the src operand to the dstType and allow the
                       implied load from memory to perform the casting */
                    if (varTypeIsSmall(srcType))
                    {
                        switch (oper->gtOper)
                        {
                            case GT_IND:
                            case GT_CLS_VAR:
                            case GT_LCL_FLD:
                            case GT_ARR_ELEM:
                                oper->gtType = dstType;
                                goto REMOVE_CAST;
                            default:
                                break;
                        }
                    }
                    else
                    {
                        goto REMOVE_CAST;
                    }
                }
            }

            if (srcSize < dstSize) // widening cast
            {
                // Keep any long casts
                if (dstSize == sizeof(int))
                {
                    // Only keep signed to unsigned widening cast with overflow check
                    if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
                    {
                        goto REMOVE_CAST;
                    }
                }

                // Casts from signed->unsigned can never overflow while widening

                if (unsignedSrc || !unsignedDst)
                {
                    tree->gtFlags &= ~GTF_OVERFLOW;
                }
            }
            else
            {
                // Try to narrow the operand of the cast and discard the cast
                // Note: Do not narrow a cast that is marked as a CSE
                // And do not narrow if the oper is marked as a CSE either
                //
                if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
                    optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
                {
                    optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);

                    /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
                    if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
                    {
                        oper = oper->gtCast.CastOp();
                    }
                    goto REMOVE_CAST;
                }
            }
        }

        switch (oper->gtOper)
        {
            /* If the operand is a constant, we'll fold it */
            case GT_CNS_INT:
            case GT_CNS_LNG:
            case GT_CNS_DBL:
            case GT_CNS_STR:
            {
                GenTreePtr oldTree = tree;

                tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)

                // Did we get a comma throw as a result of gtFoldExprConst?
                if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
                {
                    noway_assert(fgIsCommaThrow(tree));
                    tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
                    fgMorphTreeDone(tree);
                    return tree;
                }
                else if (tree->gtOper != GT_CAST)
                {
                    return tree;
                }

                noway_assert(tree->gtCast.CastOp() == oper); // unchanged
            }
            break;

            case GT_CAST:
                /* Check for two consecutive casts into the same dstType */
                if (!tree->gtOverflow())
                {
                    var_types dstType2 = oper->CastToType();
                    if (dstType == dstType2)
                    {
                        goto REMOVE_CAST;
                    }
                }
                break;

#ifdef LEGACY_BACKEND

            /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
               so that the code generator will know not to convert the result
               of the idiv to a regpair */
            case GT_MOD:
                if (dstType == TYP_INT)
                {
                    tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
                }

                break;
            case GT_UMOD:
                if (dstType == TYP_UINT)
                {
                    tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
                }
                break;

#endif // LEGACY_BACKEND

            case GT_COMMA:
                // Check for cast of a GT_COMMA with a throw overflow
                // Bug 110829: Since this optimization will bash the types
                // neither oper or commaOp2 can be CSE candidates
                if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
                {
                    GenTreePtr commaOp2 = oper->gtOp.gtOp2;

                    if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
                    {
                        // need type of oper to be same as tree
                        if (tree->gtType == TYP_LONG)
                        {
                            commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
                            commaOp2->gtIntConCommon.SetLngValue(0);
                            /* Change the types of oper and commaOp2 to TYP_LONG */
                            oper->gtType = commaOp2->gtType = TYP_LONG;
                        }
                        else if (varTypeIsFloating(tree->gtType))
                        {
                            commaOp2->ChangeOperConst(GT_CNS_DBL);
                            commaOp2->gtDblCon.gtDconVal = 0.0;
                            // Change the types of oper and commaOp2
                            // X87 promotes everything to TYP_DOUBLE
                            // But other's are a little more precise
                            const var_types newTyp
#if FEATURE_X87_DOUBLES
                                = TYP_DOUBLE;
#else  // FEATURE_X87_DOUBLES
                                = tree->gtType;
#endif // FEATURE_X87_DOUBLES
                            oper->gtType = commaOp2->gtType = newTyp;
                        }
                        else
                        {
                            commaOp2->ChangeOperConst(GT_CNS_INT);
                            commaOp2->gtIntCon.gtIconVal = 0;
                            /* Change the types of oper and commaOp2 to TYP_INT */
                            oper->gtType = commaOp2->gtType = TYP_INT;
                        }
                    }

                    if (vnStore != nullptr)
                    {
                        fgValueNumberTreeConst(commaOp2);
                    }

                    /* Return the GT_COMMA node as the new tree */
                    return oper;
                }
                break;

            default:
                break;
        } /* end switch (oper->gtOper) */
    }

    if (tree->gtOverflow())
    {
        fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
    }

    return tree;

REMOVE_CAST:

    /* Here we've eliminated the cast, so just return it's operand */
    assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate

    DEBUG_DESTROY_NODE(tree);
    return oper;
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif

/*****************************************************************************
 *
 *  Perform an unwrap operation on a Proxy object
 */

GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
{
    assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));

    CORINFO_EE_INFO* pInfo = eeGetEEInfo();
    GenTreePtr       addTree;

    // Perform the unwrap:
    //
    //   This requires two extra indirections.
    //   We mark these indirections as 'invariant' and
    //   the CSE logic will hoist them when appropriate.
    //
    //  Note that each dereference is a GC pointer

    addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));

    objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
    objRef->gtFlags |= GTF_IND_INVARIANT;

    addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));

    objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
    objRef->gtFlags |= GTF_IND_INVARIANT;

    // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
    return objRef;
}

/*****************************************************************************
 *
 *  Morph an argument list; compute the pointer argument count in the process.
 *
 *  NOTE: This function can be called from any place in the JIT to perform re-morphing
 *  due to graph altering modifications such as copy / constant propagation
 */

unsigned UpdateGT_LISTFlags(GenTreePtr tree)
{
    assert(tree->gtOper == GT_LIST);

    unsigned flags = 0;
    if (tree->gtOp.gtOp2)
    {
        flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
    }

    flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);

    tree->gtFlags &= ~GTF_ALL_EFFECT;
    tree->gtFlags |= flags;

    return tree->gtFlags;
}

#ifdef DEBUG
void fgArgTabEntry::Dump()
{
    printf("fgArgTabEntry[arg %u", argNum);
    if (regNum != REG_STK)
    {
        printf(", %s, regs=%u", getRegName(regNum), numRegs);
    }
    if (numSlots > 0)
    {
        printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
    }
    printf(", align=%u", alignment);
    if (lateArgInx != (unsigned)-1)
    {
        printf(", lateArgInx=%u", lateArgInx);
    }
    if (isSplit)
    {
        printf(", isSplit");
    }
    if (needTmp)
    {
        printf(", tmpNum=V%02u", tmpNum);
    }
    if (needPlace)
    {
        printf(", needPlace");
    }
    if (isTmp)
    {
        printf(", isTmp");
    }
    if (processed)
    {
        printf(", processed");
    }
    if (isHfaRegArg)
    {
        printf(", isHfa");
    }
    if (isBackFilled)
    {
        printf(", isBackFilled");
    }
    if (isNonStandard)
    {
        printf(", isNonStandard");
    }
    printf("]\n");
}
#endif

fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
{
    compiler    = comp;
    callTree    = call;
    argCount    = 0; // filled in arg count, starts at zero
    nextSlotNum = INIT_ARG_STACK_SLOT;
    stkLevel    = 0;
#if defined(UNIX_X86_ABI)
    alignmentDone = false;
    stkSizeBytes  = 0;
    padStkAlign   = 0;
#endif
#if FEATURE_FIXED_OUT_ARGS
    outArgSize = 0;
#endif

    argTableSize = numArgs; // the allocated table size

    hasRegArgs   = false;
    hasStackArgs = false;
    argsComplete = false;
    argsSorted   = false;

    if (argTableSize == 0)
    {
        argTable = nullptr;
    }
    else
    {
        argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
    }
}

/*****************************************************************************
 *
 *  fgArgInfo Copy Constructor
 *
 *  This method needs to act like a copy constructor for fgArgInfo.
 *  The newCall needs to have its fgArgInfo initialized such that
 *  we have newCall that is an exact copy of the oldCall.
 *  We have to take care since the argument information
 *  in the argTable contains pointers that must point to the
 *  new arguments and not the old arguments.
 */
fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
{
    fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;

    compiler    = oldArgInfo->compiler;
    callTree    = newCall;
    argCount    = 0; // filled in arg count, starts at zero
    nextSlotNum = INIT_ARG_STACK_SLOT;
    stkLevel    = oldArgInfo->stkLevel;
#if defined(UNIX_X86_ABI)
    alignmentDone = oldArgInfo->alignmentDone;
    stkSizeBytes  = oldArgInfo->stkSizeBytes;
    padStkAlign   = oldArgInfo->padStkAlign;
#endif
#if FEATURE_FIXED_OUT_ARGS
    outArgSize = oldArgInfo->outArgSize;
#endif
    argTableSize = oldArgInfo->argTableSize;
    argsComplete = false;
    argTable     = nullptr;
    if (argTableSize > 0)
    {
        argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
        for (unsigned inx = 0; inx < argTableSize; inx++)
        {
            argTable[inx] = nullptr;
        }
    }

    assert(oldArgInfo->argsComplete);

    // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
    // so we can iterate over these argument lists more uniformly.
    // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
    GenTreeArgList* newArgs;
    GenTreeArgList  newArgObjp(newCall, newCall->gtCallArgs);
    GenTreeArgList* oldArgs;
    GenTreeArgList  oldArgObjp(oldCall, oldCall->gtCallArgs);

    if (newCall->gtCallObjp == nullptr)
    {
        assert(oldCall->gtCallObjp == nullptr);
        newArgs = newCall->gtCallArgs;
        oldArgs = oldCall->gtCallArgs;
    }
    else
    {
        assert(oldCall->gtCallObjp != nullptr);
        newArgObjp.Current() = newCall->gtCallArgs;
        newArgs              = &newArgObjp;
        oldArgObjp.Current() = oldCall->gtCallObjp;
        oldArgs              = &oldArgObjp;
    }

    GenTreePtr        newCurr;
    GenTreePtr        oldCurr;
    GenTreeArgList*   newParent   = nullptr;
    GenTreeArgList*   oldParent   = nullptr;
    fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
    bool              scanRegArgs = false;

    while (newArgs)
    {
        /* Get hold of the next argument values for the oldCall and newCall */

        newCurr = newArgs->Current();
        oldCurr = oldArgs->Current();
        if (newArgs != &newArgObjp)
        {
            newParent = newArgs;
            oldParent = oldArgs;
        }
        else
        {
            assert(newParent == nullptr && oldParent == nullptr);
        }
        newArgs = newArgs->Rest();
        oldArgs = oldArgs->Rest();

        fgArgTabEntryPtr oldArgTabEntry = nullptr;
        fgArgTabEntryPtr newArgTabEntry = nullptr;

        for (unsigned inx = 0; inx < argTableSize; inx++)
        {
            oldArgTabEntry = oldArgTable[inx];

            if (oldArgTabEntry->parent == oldParent)
            {
                assert((oldParent == nullptr) == (newParent == nullptr));

                // We have found the matching "parent" field in oldArgTabEntry

                newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;

                // First block copy all fields
                //
                *newArgTabEntry = *oldArgTabEntry;

                // Then update all GenTreePtr fields in the newArgTabEntry
                //
                newArgTabEntry->parent = newParent;

                // The node field is likely to have been updated
                //  to point at a node in the gtCallLateArgs list
                //
                if (oldArgTabEntry->node == oldCurr)
                {
                    // node is not pointing into the gtCallLateArgs list
                    newArgTabEntry->node = newCurr;
                }
                else
                {
                    // node must be pointing into the gtCallLateArgs list
                    //
                    // We will fix this pointer up in the next loop
                    //
                    newArgTabEntry->node = nullptr; // For now we assign a NULL to this field

                    scanRegArgs = true;
                }

                // Now initialize the proper element in the argTable array
                //
                argTable[inx] = newArgTabEntry;
                break;
            }
        }
        // We should have found the matching oldArgTabEntry and created the newArgTabEntry
        //
        assert(newArgTabEntry != nullptr);
    }

    if (scanRegArgs)
    {
        newArgs = newCall->gtCallLateArgs;
        oldArgs = oldCall->gtCallLateArgs;

        while (newArgs)
        {
            /* Get hold of the next argument values for the oldCall and newCall */

            assert(newArgs->OperIsList());

            newCurr = newArgs->Current();
            newArgs = newArgs->Rest();

            assert(oldArgs->OperIsList());

            oldCurr = oldArgs->Current();
            oldArgs = oldArgs->Rest();

            fgArgTabEntryPtr oldArgTabEntry = nullptr;
            fgArgTabEntryPtr newArgTabEntry = nullptr;

            for (unsigned inx = 0; inx < argTableSize; inx++)
            {
                oldArgTabEntry = oldArgTable[inx];

                if (oldArgTabEntry->node == oldCurr)
                {
                    // We have found the matching "node" field in oldArgTabEntry

                    newArgTabEntry = argTable[inx];
                    assert(newArgTabEntry != nullptr);

                    // update the "node" GenTreePtr fields in the newArgTabEntry
                    //
                    assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field

                    newArgTabEntry->node = newCurr;
                    break;
                }
            }
        }
    }

    argCount     = oldArgInfo->argCount;
    nextSlotNum  = oldArgInfo->nextSlotNum;
    hasRegArgs   = oldArgInfo->hasRegArgs;
    hasStackArgs = oldArgInfo->hasStackArgs;
    argsComplete = true;
    argsSorted   = true;
}

void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
{
    assert(argCount < argTableSize);
    argTable[argCount] = curArgTabEntry;
    argCount++;
}

fgArgTabEntryPtr fgArgInfo::AddRegArg(
    unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
{
    fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;

    curArgTabEntry->argNum        = argNum;
    curArgTabEntry->node          = node;
    curArgTabEntry->parent        = parent;
    curArgTabEntry->regNum        = regNum;
    curArgTabEntry->slotNum       = 0;
    curArgTabEntry->numRegs       = numRegs;
    curArgTabEntry->numSlots      = 0;
    curArgTabEntry->alignment     = alignment;
    curArgTabEntry->lateArgInx    = (unsigned)-1;
    curArgTabEntry->tmpNum        = (unsigned)-1;
    curArgTabEntry->isSplit       = false;
    curArgTabEntry->isTmp         = false;
    curArgTabEntry->needTmp       = false;
    curArgTabEntry->needPlace     = false;
    curArgTabEntry->processed     = false;
    curArgTabEntry->isHfaRegArg   = false;
    curArgTabEntry->isBackFilled  = false;
    curArgTabEntry->isNonStandard = false;

    hasRegArgs = true;
    AddArg(curArgTabEntry);
    return curArgTabEntry;
}

#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned                                                         argNum,
                                      GenTreePtr                                                       node,
                                      GenTreePtr                                                       parent,
                                      regNumber                                                        regNum,
                                      unsigned                                                         numRegs,
                                      unsigned                                                         alignment,
                                      const bool                                                       isStruct,
                                      const regNumber                                                  otherRegNum,
                                      const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
{
    fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
    assert(curArgTabEntry != nullptr);

    // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
    // PlaceHolder node (in case of needed late argument, for example.)
    // This requires using of an extra flag. At creation time the state is right, so
    // and this assert enforces that.
    assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
    curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
    curArgTabEntry->isStruct    = isStruct;    // is this a struct arg

    if (isStruct && structDescPtr != nullptr)
    {
        curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
    }

    return curArgTabEntry;
}
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)

fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned   argNum,
                                      GenTreePtr node,
                                      GenTreePtr parent,
                                      unsigned   numSlots,
                                      unsigned   alignment
                                          FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
{
    fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;

    nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);

#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
    // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
    // PlaceHolder node (in case of needed late argument, for example.)
    // This reqires using of an extra flag. At creation time the state is right, so
    // and this assert enforces that.
    assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
    curArgTabEntry->isStruct = isStruct; // is this a struct arg
#endif                                   // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)

    curArgTabEntry->argNum        = argNum;
    curArgTabEntry->node          = node;
    curArgTabEntry->parent        = parent;
    curArgTabEntry->regNum        = REG_STK;
    curArgTabEntry->slotNum       = nextSlotNum;
    curArgTabEntry->numRegs       = 0;
    curArgTabEntry->numSlots      = numSlots;
    curArgTabEntry->alignment     = alignment;
    curArgTabEntry->lateArgInx    = (unsigned)-1;
    curArgTabEntry->tmpNum        = (unsigned)-1;
    curArgTabEntry->isSplit       = false;
    curArgTabEntry->isTmp         = false;
    curArgTabEntry->needTmp       = false;
    curArgTabEntry->needPlace     = false;
    curArgTabEntry->processed     = false;
    curArgTabEntry->isHfaRegArg   = false;
    curArgTabEntry->isBackFilled  = false;
    curArgTabEntry->isNonStandard = false;

    hasStackArgs = true;
    AddArg(curArgTabEntry);

    nextSlotNum += numSlots;
    return curArgTabEntry;
}

void fgArgInfo::RemorphReset()
{
    nextSlotNum = INIT_ARG_STACK_SLOT;
}

fgArgTabEntry* fgArgInfo::RemorphRegArg(
    unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
{
    fgArgTabEntryPtr curArgTabEntry = nullptr;
    unsigned         regArgInx      = 0;
    unsigned         inx;

    for (inx = 0; inx < argCount; inx++)
    {
        curArgTabEntry = argTable[inx];
        if (curArgTabEntry->argNum == argNum)
        {
            break;
        }

        bool       isRegArg;
        GenTreePtr argx;
        if (curArgTabEntry->parent != nullptr)
        {
            assert(curArgTabEntry->parent->OperIsList());
            argx     = curArgTabEntry->parent->Current();
            isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
        }
        else
        {
            argx     = curArgTabEntry->node;
            isRegArg = true;
        }

        if (isRegArg)
        {
            regArgInx++;
        }
    }
    // if this was a nonstandard arg the table is definitive
    if (curArgTabEntry->isNonStandard)
    {
        regNum = curArgTabEntry->regNum;
    }

    assert(curArgTabEntry->argNum == argNum);
    assert(curArgTabEntry->regNum == regNum);
    assert(curArgTabEntry->alignment == alignment);
    assert(curArgTabEntry->parent == parent);

    if (curArgTabEntry->node != node)
    {
        GenTreePtr argx     = nullptr;
        unsigned   regIndex = 0;

        /* process the register argument list */
        for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
        {
            argx = list->Current();
            assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
            if (regIndex == regArgInx)
            {
                break;
            }
        }
        assert(regIndex == regArgInx);
        assert(regArgInx == curArgTabEntry->lateArgInx);

        if (curArgTabEntry->node != argx)
        {
            curArgTabEntry->node = argx;
        }
    }
    return curArgTabEntry;
}

void fgArgInfo::RemorphStkArg(
    unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
{
    fgArgTabEntryPtr curArgTabEntry = nullptr;
    bool             isRegArg       = false;
    unsigned         regArgInx      = 0;
    GenTreePtr       argx;
    unsigned         inx;

    for (inx = 0; inx < argCount; inx++)
    {
        curArgTabEntry = argTable[inx];

        if (curArgTabEntry->parent != nullptr)
        {
            assert(curArgTabEntry->parent->OperIsList());
            argx     = curArgTabEntry->parent->Current();
            isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
        }
        else
        {
            argx     = curArgTabEntry->node;
            isRegArg = true;
        }

        if (curArgTabEntry->argNum == argNum)
        {
            break;
        }

        if (isRegArg)
        {
            regArgInx++;
        }
    }

    nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);

    assert(curArgTabEntry->argNum == argNum);
    assert(curArgTabEntry->slotNum == nextSlotNum);
    assert(curArgTabEntry->numSlots == numSlots);
    assert(curArgTabEntry->alignment == alignment);
    assert(curArgTabEntry->parent == parent);
    assert(parent->OperIsList());

#if FEATURE_FIXED_OUT_ARGS
    if (curArgTabEntry->node != node)
    {
        if (isRegArg)
        {
            GenTreePtr argx     = nullptr;
            unsigned   regIndex = 0;

            /* process the register argument list */
            for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
            {
                argx = list->Current();
                assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
                if (regIndex == regArgInx)
                {
                    break;
                }
            }
            assert(regIndex == regArgInx);
            assert(regArgInx == curArgTabEntry->lateArgInx);

            if (curArgTabEntry->node != argx)
            {
                curArgTabEntry->node = argx;
            }
        }
        else
        {
            assert(parent->Current() == node);
            curArgTabEntry->node = node;
        }
    }
#else
    curArgTabEntry->node = node;
#endif

    nextSlotNum += numSlots;
}

void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
{
    fgArgTabEntryPtr curArgTabEntry = nullptr;
    assert(argNum < argCount);
    for (unsigned inx = 0; inx < argCount; inx++)
    {
        curArgTabEntry = argTable[inx];
        if (curArgTabEntry->argNum == argNum)
        {
            break;
        }
    }

    assert(numRegs > 0);
    assert(numSlots > 0);

    curArgTabEntry->isSplit  = true;
    curArgTabEntry->numRegs  = numRegs;
    curArgTabEntry->numSlots = numSlots;

    nextSlotNum += numSlots;
}

void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
{
    fgArgTabEntryPtr curArgTabEntry = nullptr;
    assert(argNum < argCount);
    for (unsigned inx = 0; inx < argCount; inx++)
    {
        curArgTabEntry = argTable[inx];
        if (curArgTabEntry->argNum == argNum)
        {
            break;
        }
    }
    assert(curArgTabEntry->parent->Current() == newNode);

    curArgTabEntry->node   = newNode;
    curArgTabEntry->tmpNum = tmpNum;
    curArgTabEntry->isTmp  = true;
}

void fgArgInfo::ArgsComplete()
{
    bool hasStackArgs    = false;
    bool hasStructRegArg = false;

    for (unsigned curInx = 0; curInx < argCount; curInx++)
    {
        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
        assert(curArgTabEntry != nullptr);
        GenTreePtr argx = curArgTabEntry->node;

        if (curArgTabEntry->regNum == REG_STK)
        {
            hasStackArgs = true;
#if !FEATURE_FIXED_OUT_ARGS
            // On x86 we use push instructions to pass arguments:
            //   The non-register arguments are evaluated and pushed in order
            //   and they are never evaluated into temps
            //
            continue;
#endif
        }
        else // we have a register argument, next we look for a struct type.
        {
            if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
            {
                hasStructRegArg = true;
            }
        }

        /* If the argument tree contains an assignment (GTF_ASG) then the argument and
           and every earlier argument (except constants) must be evaluated into temps
           since there may be other arguments that follow and they may use the value being assigned.

           EXAMPLE: ArgTab is "a, a=5, a"
                    -> when we see the second arg "a=5"
                       we know the first two arguments "a, a=5" have to be evaluated into temps

           For the case of an assignment, we only know that there exist some assignment someplace
           in the tree.  We don't know what is being assigned so we are very conservative here
           and assume that any local variable could have been assigned.
         */

        if (argx->gtFlags & GTF_ASG)
        {
            // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
            // a tmp, then we need a temp in the late arg list.
            if ((argCount > 1) || argx->OperIsCopyBlkOp()
#ifdef FEATURE_FIXED_OUT_ARGS
                || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
                                         // that we only have late non-register args when that feature is on.
#endif                                   // FEATURE_FIXED_OUT_ARGS
                )
            {
                curArgTabEntry->needTmp = true;
            }

            // For all previous arguments, unless they are a simple constant
            //  we require that they be evaluated into temps
            for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
            {
                fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
                assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);

                assert(prevArgTabEntry->node);
                if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
                {
                    prevArgTabEntry->needTmp = true;
                }
            }
        }

#if FEATURE_FIXED_OUT_ARGS
        // Like calls, if this argument has a tree that will do an inline throw,
        // a call to a jit helper, then we need to treat it like a call (but only
        // if there are/were any stack args).
        // This means unnesting, sorting, etc.  Technically this is overly
        // conservative, but I want to avoid as much special-case debug-only code
        // as possible, so leveraging the GTF_CALL flag is the easiest.
        if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
            compiler->opts.compDbgCode &&
            (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
        {
            for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
            {
                if (otherInx == curInx)
                {
                    continue;
                }

                if (argTable[otherInx]->regNum == REG_STK)
                {
                    argx->gtFlags |= GTF_CALL;
                    break;
                }
            }
        }
#endif // FEATURE_FIXED_OUT_ARGS

        /* If it contains a call (GTF_CALL) then itself and everything before the call
           with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
           has to be kept in the right order since we will move the call to the first position)

           For calls we don't have to be quite as conservative as we are with an assignment
           since the call won't be modifying any non-address taken LclVars.
         */

        if (argx->gtFlags & GTF_CALL)
        {
            if (argCount > 1) // If this is not the only argument
            {
                curArgTabEntry->needTmp = true;
            }
            else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
            {
                // Spill all arguments that are floating point calls
                curArgTabEntry->needTmp = true;
            }

            // All previous arguments may need to be evaluated into temps
            for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
            {
                fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
                assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
                assert(prevArgTabEntry->node);

                // For all previous arguments, if they have any GTF_ALL_EFFECT
                //  we require that they be evaluated into a temp
                if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
                {
                    prevArgTabEntry->needTmp = true;
                }
#if FEATURE_FIXED_OUT_ARGS
                // Or, if they are stored into the FIXED_OUT_ARG area
                // we require that they be moved to the gtCallLateArgs
                // and replaced with a placeholder node
                else if (prevArgTabEntry->regNum == REG_STK)
                {
                    prevArgTabEntry->needPlace = true;
                }
#endif
            }
        }

#ifndef LEGACY_BACKEND
#if FEATURE_MULTIREG_ARGS
        // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
        // with multiple indirections, so here we consider spilling it into a tmp LclVar.
        //
        // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
        // so we skip this for ARM32 until it is ported to use RyuJIT backend
        //

        bool isMultiRegArg = (curArgTabEntry->numRegs > 1);

        if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
        {
            if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
            {
                // Spill multireg struct arguments that have Assignments or Calls embedded in them
                curArgTabEntry->needTmp = true;
            }
            else
            {
                // We call gtPrepareCost to measure the cost of evaluating this tree
                compiler->gtPrepareCost(argx);

                if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
                {
                    // Spill multireg struct arguments that are expensive to evaluate twice
                    curArgTabEntry->needTmp = true;
                }
                else if (argx->OperGet() == GT_OBJ)
                {
                    GenTreeObj*          argObj     = argx->AsObj();
                    CORINFO_CLASS_HANDLE objClass   = argObj->gtClass;
                    unsigned             structSize = compiler->info.compCompHnd->getClassSize(objClass);
                    switch (structSize)
                    {
                        case 3:
                        case 5:
                        case 6:
                        case 7:
                            // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
                            //
                            if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
                            {
                                // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
                                // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
                                //
                                curArgTabEntry->needTmp = true;
                            }
                            break;

                        case 11:
                        case 13:
                        case 14:
                        case 15:
                            // Spill any GT_OBJ multireg structs that are difficult to extract
                            //
                            // When we have a GT_OBJ of a struct with the above sizes we would need
                            // to use 3 or 4 load instructions to load the exact size of this struct.
                            // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
                            // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
                            // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
                            // the argument.
                            //
                            curArgTabEntry->needTmp = true;
                            break;

                        default:
                            break;
                    }
                }
            }
        }
#endif // FEATURE_MULTIREG_ARGS
#endif // LEGACY_BACKEND
    }

    // We only care because we can't spill structs and qmarks involve a lot of spilling, but
    // if we don't have qmarks, then it doesn't matter.
    // So check for Qmark's globally once here, instead of inside the loop.
    //
    const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);

#if FEATURE_FIXED_OUT_ARGS

    // For Arm/x64 we only care because we can't reorder a register
    // argument that uses GT_LCLHEAP.  This is an optimization to
    // save a check inside the below loop.
    //
    const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);

#else

    const bool hasStackArgsWeCareAbout = hasStackArgs;

#endif // FEATURE_FIXED_OUT_ARGS

    // If we have any stack args we have to force the evaluation
    // of any arguments passed in registers that might throw an exception
    //
    // Technically we only a required to handle the following two cases:
    //     a GT_IND with GTF_IND_RNGCHK (only on x86) or
    //     a GT_LCLHEAP node that allocates stuff on the stack
    //
    if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
    {
        for (unsigned curInx = 0; curInx < argCount; curInx++)
        {
            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
            assert(curArgTabEntry != nullptr);
            GenTreePtr argx = curArgTabEntry->node;

            // Examine the register args that are currently not marked needTmp
            //
            if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
            {
                if (hasStackArgsWeCareAbout)
                {
#if !FEATURE_FIXED_OUT_ARGS
                    // On x86 we previously recorded a stack depth of zero when
                    // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
                    // Thus we can not reorder the argument after any stack based argument
                    // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
                    // check for it explicitly
                    //
                    if (argx->gtFlags & GTF_EXCEPT)
                    {
                        curArgTabEntry->needTmp = true;
                        continue;
                    }
#else
                    // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
                    //
                    if (argx->gtFlags & GTF_EXCEPT)
                    {
                        assert(compiler->compLocallocUsed);

                        // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
                        //
                        if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
                        {
                            curArgTabEntry->needTmp = true;
                            continue;
                        }
                    }
#endif
                }
                if (hasStructRegArgWeCareAbout)
                {
                    // Returns true if a GT_QMARK node is encountered in the argx tree
                    //
                    if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
                    {
                        curArgTabEntry->needTmp = true;
                        continue;
                    }
                }
            }
        }
    }

    argsComplete = true;
}

void fgArgInfo::SortArgs()
{
    assert(argsComplete == true);

#ifdef DEBUG
    if (compiler->verbose)
    {
        printf("\nSorting the arguments:\n");
    }
#endif

    /* Shuffle the arguments around before we build the gtCallLateArgs list.
       The idea is to move all "simple" arguments like constants and local vars
       to the end of the table, and move the complex arguments towards the beginning
       of the table. This will help prevent registers from being spilled by
       allowing us to evaluate the more complex arguments before the simpler arguments.
       The argTable ends up looking like:
           +------------------------------------+  <--- argTable[argCount - 1]
           |          constants                 |
           +------------------------------------+
           |    local var / local field         |
           +------------------------------------+
           | remaining arguments sorted by cost |
           +------------------------------------+
           | temps (argTable[].needTmp = true)  |
           +------------------------------------+
           |  args with calls (GTF_CALL)        |
           +------------------------------------+  <--- argTable[0]
     */

    /* Set the beginning and end for the new argument table */
    unsigned curInx;
    int      regCount      = 0;
    unsigned begTab        = 0;
    unsigned endTab        = argCount - 1;
    unsigned argsRemaining = argCount;

    // First take care of arguments that are constants.
    // [We use a backward iterator pattern]
    //
    curInx = argCount;
    do
    {
        curInx--;

        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

        if (curArgTabEntry->regNum != REG_STK)
        {
            regCount++;
        }

        // Skip any already processed args
        //
        if (!curArgTabEntry->processed)
        {
            GenTreePtr argx = curArgTabEntry->node;

            // put constants at the end of the table
            //
            if (argx->gtOper == GT_CNS_INT)
            {
                noway_assert(curInx <= endTab);

                curArgTabEntry->processed = true;

                // place curArgTabEntry at the endTab position by performing a swap
                //
                if (curInx != endTab)
                {
                    argTable[curInx] = argTable[endTab];
                    argTable[endTab] = curArgTabEntry;
                }

                endTab--;
                argsRemaining--;
            }
        }
    } while (curInx > 0);

    if (argsRemaining > 0)
    {
        // Next take care of arguments that are calls.
        // [We use a forward iterator pattern]
        //
        for (curInx = begTab; curInx <= endTab; curInx++)
        {
            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

            // Skip any already processed args
            //
            if (!curArgTabEntry->processed)
            {
                GenTreePtr argx = curArgTabEntry->node;

                // put calls at the beginning of the table
                //
                if (argx->gtFlags & GTF_CALL)
                {
                    curArgTabEntry->processed = true;

                    // place curArgTabEntry at the begTab position by performing a swap
                    //
                    if (curInx != begTab)
                    {
                        argTable[curInx] = argTable[begTab];
                        argTable[begTab] = curArgTabEntry;
                    }

                    begTab++;
                    argsRemaining--;
                }
            }
        }
    }

    if (argsRemaining > 0)
    {
        // Next take care arguments that are temps.
        // These temps come before the arguments that are
        // ordinary local vars or local fields
        // since this will give them a better chance to become
        // enregistered into their actual argument register.
        // [We use a forward iterator pattern]
        //
        for (curInx = begTab; curInx <= endTab; curInx++)
        {
            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

            // Skip any already processed args
            //
            if (!curArgTabEntry->processed)
            {
                if (curArgTabEntry->needTmp)
                {
                    curArgTabEntry->processed = true;

                    // place curArgTabEntry at the begTab position by performing a swap
                    //
                    if (curInx != begTab)
                    {
                        argTable[curInx] = argTable[begTab];
                        argTable[begTab] = curArgTabEntry;
                    }

                    begTab++;
                    argsRemaining--;
                }
            }
        }
    }

    if (argsRemaining > 0)
    {
        // Next take care of local var and local field arguments.
        // These are moved towards the end of the argument evaluation.
        // [We use a backward iterator pattern]
        //
        curInx = endTab + 1;
        do
        {
            curInx--;

            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

            // Skip any already processed args
            //
            if (!curArgTabEntry->processed)
            {
                GenTreePtr argx = curArgTabEntry->node;

                if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
                {
                    noway_assert(curInx <= endTab);

                    curArgTabEntry->processed = true;

                    // place curArgTabEntry at the endTab position by performing a swap
                    //
                    if (curInx != endTab)
                    {
                        argTable[curInx] = argTable[endTab];
                        argTable[endTab] = curArgTabEntry;
                    }

                    endTab--;
                    argsRemaining--;
                }
            }
        } while (curInx > begTab);
    }

    // Finally, take care of all the remaining arguments.
    // Note that we fill in one arg at a time using a while loop.
    bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
    while (argsRemaining > 0)
    {
        /* Find the most expensive arg remaining and evaluate it next */

        fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
        unsigned         expensiveArg         = UINT_MAX;
        unsigned         expensiveArgCost     = 0;

        // [We use a forward iterator pattern]
        //
        for (curInx = begTab; curInx <= endTab; curInx++)
        {
            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

            // Skip any already processed args
            //
            if (!curArgTabEntry->processed)
            {
                GenTreePtr argx = curArgTabEntry->node;

                // We should have already handled these kinds of args
                assert(argx->gtOper != GT_LCL_VAR);
                assert(argx->gtOper != GT_LCL_FLD);
                assert(argx->gtOper != GT_CNS_INT);

                // This arg should either have no persistent side effects or be the last one in our table
                // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));

                if (argsRemaining == 1)
                {
                    // This is the last arg to place
                    expensiveArg         = curInx;
                    expensiveArgTabEntry = curArgTabEntry;
                    assert(begTab == endTab);
                    break;
                }
                else
                {
                    if (!costsPrepared)
                    {
                        /* We call gtPrepareCost to measure the cost of evaluating this tree */
                        compiler->gtPrepareCost(argx);
                    }

                    if (argx->gtCostEx > expensiveArgCost)
                    {
                        // Remember this arg as the most expensive one that we have yet seen
                        expensiveArgCost     = argx->gtCostEx;
                        expensiveArg         = curInx;
                        expensiveArgTabEntry = curArgTabEntry;
                    }
                }
            }
        }

        noway_assert(expensiveArg != UINT_MAX);

        // put the most expensive arg towards the beginning of the table

        expensiveArgTabEntry->processed = true;

        // place expensiveArgTabEntry at the begTab position by performing a swap
        //
        if (expensiveArg != begTab)
        {
            argTable[expensiveArg] = argTable[begTab];
            argTable[begTab]       = expensiveArgTabEntry;
        }

        begTab++;
        argsRemaining--;

        costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
    }

    // The table should now be completely filled and thus begTab should now be adjacent to endTab
    // and regArgsRemaining should be zero
    assert(begTab == (endTab + 1));
    assert(argsRemaining == 0);

#if !FEATURE_FIXED_OUT_ARGS
    // Finally build the regArgList
    //
    callTree->gtCall.regArgList      = NULL;
    callTree->gtCall.regArgListCount = regCount;

    unsigned regInx = 0;
    for (curInx = 0; curInx < argCount; curInx++)
    {
        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

        if (curArgTabEntry->regNum != REG_STK)
        {
            // Encode the argument register in the register mask
            //
            callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
            regInx++;
        }
    }
#endif // !FEATURE_FIXED_OUT_ARGS

    argsSorted = true;
}

//------------------------------------------------------------------------------
// fgMakeTmpArgNode : This function creates a tmp var only if needed.
//                    We need this to be done in order to enforce ordering
//                    of the evaluation of arguments.
//
// Arguments:
//    tmpVarNum  - the var num which we clone into the newly created temp var.
//
// Return Value:
//    the newly created temp var tree.

GenTreePtr Compiler::fgMakeTmpArgNode(
    unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
{
    LclVarDsc* varDsc = &lvaTable[tmpVarNum];
    assert(varDsc->lvIsTemp);
    var_types type = varDsc->TypeGet();

    // Create a copy of the temp to go into the late argument list
    GenTreePtr arg      = gtNewLclvNode(tmpVarNum, type);
    GenTreePtr addrNode = nullptr;

    if (varTypeIsStruct(type))
    {

#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING

        arg->gtFlags |= GTF_DONT_CSE;

#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
        // Can this type be passed in a single register?
        // If so, the following call will return the corresponding primitive type.
        // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.

        bool                 passedInRegisters = false;
        structPassingKind    kind;
        CORINFO_CLASS_HANDLE clsHnd         = varDsc->lvVerTypeInfo.GetClassHandle();
        var_types            structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);

        if (structBaseType != TYP_UNKNOWN)
        {
            passedInRegisters = true;
            type              = structBaseType;
        }
#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING

        // If it is passed in registers, don't get the address of the var. Make it a
        // field instead. It will be loaded in registers with putarg_reg tree in lower.
        if (passedInRegisters)
        {
            arg->ChangeOper(GT_LCL_FLD);
            arg->gtType = type;
        }
        else
        {
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
            // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
            var_types addrType = type;
#else
            var_types addrType = TYP_BYREF;
#endif
            arg      = gtNewOperNode(GT_ADDR, addrType, arg);
            addrNode = arg;

#if FEATURE_MULTIREG_ARGS
#ifdef _TARGET_ARM64_
            assert(varTypeIsStruct(type));
            if (lvaIsMultiregStruct(varDsc))
            {
                // ToDo-ARM64: Consider using:  arg->ChangeOper(GT_LCL_FLD);
                // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
                // We will create a GT_OBJ for the argument below.
                // This will be passed by value in two registers.
                assert(addrNode != nullptr);

                // Create an Obj of the temp to use it as a call argument.
                arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);

                // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
                // this is only to preserve former behavior (though some CSE'ing of struct
                // values can be pessimizing, so enabling this may require some additional tuning).
                arg->gtFlags |= GTF_DONT_CSE;
            }
#endif // _TARGET_ARM64_
#endif // FEATURE_MULTIREG_ARGS
        }

#else // not (_TARGET_AMD64_ or _TARGET_ARM64_)

        // other targets, we pass the struct by value
        assert(varTypeIsStruct(type));

        addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);

        // Get a new Obj node temp to use it as a call argument.
        // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
        arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);

#endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)

    } // (varTypeIsStruct(type))

    if (addrNode != nullptr)
    {
        assert(addrNode->gtOper == GT_ADDR);

        // This will prevent this LclVar from being optimized away
        lvaSetVarAddrExposed(tmpVarNum);

        // the child of a GT_ADDR is required to have this flag set
        addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
    }

    return arg;
}

void fgArgInfo::EvalArgsToTemps()
{
    assert(argsSorted == true);

    unsigned regArgInx = 0;
    // Now go through the argument table and perform the necessary evaluation into temps
    GenTreeArgList* tmpRegArgNext = nullptr;
    for (unsigned curInx = 0; curInx < argCount; curInx++)
    {
        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

        GenTreePtr argx     = curArgTabEntry->node;
        GenTreePtr setupArg = nullptr;
        GenTreePtr defArg;

#if !FEATURE_FIXED_OUT_ARGS
        // Only ever set for FEATURE_FIXED_OUT_ARGS
        assert(curArgTabEntry->needPlace == false);

        // On x86 and other archs that use push instructions to pass arguments:
        //   Only the register arguments need to be replaced with placeholder nodes.
        //   Stacked arguments are evaluated and pushed (or stored into the stack) in order.
        //
        if (curArgTabEntry->regNum == REG_STK)
            continue;
#endif

        if (curArgTabEntry->needTmp)
        {
            unsigned tmpVarNum;

            if (curArgTabEntry->isTmp == true)
            {
                // Create a copy of the temp to go into the late argument list
                tmpVarNum = curArgTabEntry->tmpNum;
                defArg    = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
                    argTable[curInx]->structDesc.passedInRegisters));

                // mark the original node as a late argument
                argx->gtFlags |= GTF_LATE_ARG;
            }
            else
            {
                // Create a temp assignment for the argument
                // Put the temp in the gtCallLateArgs list
                CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef DEBUG
                if (compiler->verbose)
                {
                    printf("Argument with 'side effect'...\n");
                    compiler->gtDispTree(argx);
                }
#endif

#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                noway_assert(argx->gtType != TYP_STRUCT);
#endif

                tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
                if (argx->gtOper == GT_MKREFANY)
                {
                    // For GT_MKREFANY, typically the actual struct copying does
                    // not have any side-effects and can be delayed. So instead
                    // of using a temp for the whole struct, we can just use a temp
                    // for operand that that has a side-effect
                    GenTreePtr operand;
                    if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
                    {
                        operand = argx->gtOp.gtOp1;

                        // In the early argument evaluation, place an assignment to the temp
                        // from the source operand of the mkrefany
                        setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);

                        // Replace the operand for the mkrefany with the new temp.
                        argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
                    }
                    else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
                    {
                        operand = argx->gtOp.gtOp2;

                        // In the early argument evaluation, place an assignment to the temp
                        // from the source operand of the mkrefany
                        setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);

                        // Replace the operand for the mkrefany with the new temp.
                        argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
                    }
                }

                if (setupArg != nullptr)
                {
                    // Now keep the mkrefany for the late argument list
                    defArg = argx;

                    // Clear the side-effect flags because now both op1 and op2 have no side-effects
                    defArg->gtFlags &= ~GTF_ALL_EFFECT;
                }
                else
                {
                    setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);

                    LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;

#ifndef LEGACY_BACKEND
                    if (compiler->fgOrder == Compiler::FGOrderLinear)
                    {
                        // We'll reference this temporary variable just once
                        // when we perform the function call after
                        // setting up this argument.
                        varDsc->lvRefCnt = 1;
                    }
#endif // !LEGACY_BACKEND

                    var_types lclVarType = genActualType(argx->gtType);
                    var_types scalarType = TYP_UNKNOWN;

                    if (setupArg->OperIsCopyBlkOp())
                    {
                        setupArg = compiler->fgMorphCopyBlock(setupArg);
#ifdef _TARGET_ARM64_
                        // This scalar LclVar widening step is only performed for ARM64
                        //
                        CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
                        unsigned             structSize = varDsc->lvExactSize;

                        scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
#endif // _TARGET_ARM64_
                    }

                    // scalarType can be set to a wider type for ARM64: (3 => 4)  or (5,6,7 => 8)
                    if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
                    {
                        // Create a GT_LCL_FLD using the wider type to go to the late argument list
                        defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
                    }
                    else
                    {
                        // Create a copy of the temp to go to the late argument list
                        defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
                    }

                    curArgTabEntry->isTmp  = true;
                    curArgTabEntry->tmpNum = tmpVarNum;

#ifdef _TARGET_ARM_
                    // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
                    // might have left holes in the used registers (see
                    // fgAddSkippedRegsInPromotedStructArg).
                    // Too bad we're not that smart for these intermediate temps...
                    if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
                    {
                        regNumber argReg      = curArgTabEntry->regNum;
                        regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
                        for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
                        {
                            argReg = genRegArgNext(argReg);
                            allUsedRegs |= genRegMask(argReg);
                        }
#ifdef LEGACY_BACKEND
                        callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
#endif // LEGACY_BACKEND
                    }
#endif // _TARGET_ARM_
                }

                /* mark the assignment as a late argument */
                setupArg->gtFlags |= GTF_LATE_ARG;

#ifdef DEBUG
                if (compiler->verbose)
                {
                    printf("\n  Evaluate to a temp:\n");
                    compiler->gtDispTree(setupArg);
                }
#endif
            }
        }
        else // curArgTabEntry->needTmp == false
        {
            //   On x86 -
            //      Only register args are replaced with placeholder nodes
            //      and the stack based arguments are evaluated and pushed in order.
            //
            //   On Arm/x64 - When needTmp is false and needPlace is false,
            //      the non-register arguments are evaluated and stored in order.
            //      When needPlace is true we have a nested call that comes after
            //      this argument so we have to replace it in the gtCallArgs list
            //      (the initial argument evaluation list) with a placeholder.
            //
            if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
            {
                continue;
            }

            /* No temp needed - move the whole node to the gtCallLateArgs list */

            /* The argument is deferred and put in the late argument list */

            defArg = argx;

            // Create a placeholder node to put in its place in gtCallLateArgs.

            // For a struct type we also need to record the class handle of the arg.
            CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;

#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)

            // All structs are either passed (and retyped) as integral types, OR they
            // are passed by reference.
            noway_assert(argx->gtType != TYP_STRUCT);

#else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)

            if (varTypeIsStruct(defArg))
            {
                // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
                GenTreePtr defArgTmp = defArg;

                // The GT_OBJ may be be a child of a GT_COMMA.
                while (defArgTmp->gtOper == GT_COMMA)
                {
                    defArgTmp = defArgTmp->gtOp.gtOp2;
                }
                assert(varTypeIsStruct(defArgTmp));

                // We handle two opcodes: GT_MKREFANY and GT_OBJ.
                if (defArgTmp->gtOper == GT_MKREFANY)
                {
                    clsHnd = compiler->impGetRefAnyClass();
                }
                else if (defArgTmp->gtOper == GT_OBJ)
                {
                    clsHnd = defArgTmp->AsObj()->gtClass;
                }
                else
                {
                    BADCODE("Unhandled struct argument tree in fgMorphArgs");
                }
            }

#endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))

            setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);

            /* mark the placeholder node as a late argument */
            setupArg->gtFlags |= GTF_LATE_ARG;

#ifdef DEBUG
            if (compiler->verbose)
            {
                if (curArgTabEntry->regNum == REG_STK)
                {
                    printf("Deferred stack argument :\n");
                }
                else
                {
                    printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
                }

                compiler->gtDispTree(argx);
                printf("Replaced with placeholder node:\n");
                compiler->gtDispTree(setupArg);
            }
#endif
        }

        if (setupArg != nullptr)
        {
            if (curArgTabEntry->parent)
            {
                GenTreePtr parent = curArgTabEntry->parent;
                /* a normal argument from the list */
                noway_assert(parent->OperIsList());
                noway_assert(parent->gtOp.gtOp1 == argx);

                parent->gtOp.gtOp1 = setupArg;
            }
            else
            {
                /* must be the gtCallObjp */
                noway_assert(callTree->gtCall.gtCallObjp == argx);

                callTree->gtCall.gtCallObjp = setupArg;
            }
        }

        /* deferred arg goes into the late argument list */

        if (tmpRegArgNext == nullptr)
        {
            tmpRegArgNext                   = compiler->gtNewArgList(defArg);
            callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
        }
        else
        {
            noway_assert(tmpRegArgNext->OperIsList());
            noway_assert(tmpRegArgNext->Current());
            tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
            tmpRegArgNext             = tmpRegArgNext->Rest();
        }

        curArgTabEntry->node       = defArg;
        curArgTabEntry->lateArgInx = regArgInx++;
    }

#ifdef DEBUG
    if (compiler->verbose)
    {
        printf("\nShuffled argument table:    ");
        for (unsigned curInx = 0; curInx < argCount; curInx++)
        {
            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];

            if (curArgTabEntry->regNum != REG_STK)
            {
                printf("%s ", getRegName(curArgTabEntry->regNum));
            }
        }
        printf("\n");
    }
#endif
}

// Get the late arg for arg at position argIndex.
// argIndex - 0-based position to get late arg for.
//            Caller must ensure this position has a late arg.
GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
{
    for (unsigned j = 0; j < this->ArgCount(); j++)
    {
        if (this->ArgTable()[j]->argNum == argIndex)
        {
            return this->ArgTable()[j]->node;
        }
    }
    // Caller must ensure late arg exists.
    unreached();
}

void fgArgInfo::RecordStkLevel(unsigned stkLvl)
{
    assert(!IsUninitialized(stkLvl));
    this->stkLevel = stkLvl;
}

unsigned fgArgInfo::RetrieveStkLevel()
{
    assert(!IsUninitialized(stkLevel));
    return stkLevel;
}

// Return a conservative estimate of the stack size in bytes.
// It will be used only on the intercepted-for-host code path to copy the arguments.
int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
{

    int numArgs = 0;
    for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
    {
        numArgs++;
    }

    int numStkArgs;
    if (numArgs > MAX_REG_ARG)
    {
        numStkArgs = numArgs - MAX_REG_ARG;
    }
    else
    {
        numStkArgs = 0;
    }

    return numStkArgs * REGSIZE_BYTES;
}

//------------------------------------------------------------------------------
// fgMakeMultiUse : If the node is a local, clone it and increase the ref count
//                  otherwise insert a comma form temp
//
// Arguments:
//    ppTree  - a pointer to the child node we will be replacing with the comma expression that
//              evaluates ppTree to a temp and returns the result
//
// Return Value:
//    A fresh GT_LCL_VAR node referencing the temp which has not been used
//
// Assumption:
//    The result tree MUST be added to the tree structure since the ref counts are
//    already incremented.

GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
{
    GenTree* tree = *pOp;
    if (tree->IsLocal())
    {
        auto result = gtClone(tree);
        if (lvaLocalVarRefCounted)
        {
            lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
        }
        return result;
    }
    else
    {
        GenTree* result = fgInsertCommaFormTemp(pOp);

        // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
        // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
        // be added by the caller.
        if (lvaLocalVarRefCounted)
        {
            lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
            lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
            lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
        }

        return result;
    }
}

//------------------------------------------------------------------------------
// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
//                        and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
//
// Arguments:
//    ppTree     - a pointer to the child node we will be replacing with the comma expression that
//                 evaluates ppTree to a temp and returns the result
//
//    structType - value type handle if the temp created is of TYP_STRUCT.
//
// Return Value:
//    A fresh GT_LCL_VAR node referencing the temp which has not been used
//

GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
{
    GenTree* subTree = *ppTree;

    unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));

    if (varTypeIsStruct(subTree))
    {
        assert(structType != nullptr);
        lvaSetStruct(lclNum, structType, false);
    }

    // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
    // The type of GT_COPYBLK is TYP_VOID.  Therefore, we should use subTree->TypeGet() for
    // setting type of lcl vars created.
    GenTree* asg = gtNewTempAssign(lclNum, subTree);

    GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);

    GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);

    *ppTree = comma;

    return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
}

//------------------------------------------------------------------------
// fgMorphArgs: Walk and transform (morph) the arguments of a call
//
// Arguments:
//    callNode - the call for which we are doing the argument morphing
//
// Return Value:
//    Like most morph methods, this method returns the morphed node,
//    though in this case there are currently no scenarios where the
//    node itself is re-created.
//
// Notes:
//    This method is even less idempotent than most morph methods.
//    That is, it makes changes that should not be redone. It uses the existence
//    of gtCallLateArgs (the late arguments list) to determine if it has
//    already done that work.
//
//    The first time it is called (i.e. during global morphing), this method
//    computes the "late arguments". This is when it determines which arguments
//    need to be evaluated to temps prior to the main argument setup, and which
//    can be directly evaluated into the argument location. It also creates a
//    second argument list (gtCallLateArgs) that does the final placement of the
//    arguments, e.g. into registers or onto the stack.
//
//    The "non-late arguments", aka the gtCallArgs, are doing the in-order
//    evaluation of the arguments that might have side-effects, such as embedded
//    assignments, calls or possible throws. In these cases, it and earlier
//    arguments must be evaluated to temps.
//
//    On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
//    if we have any nested calls, we need to defer the copying of the argument
//    into the fixed argument area until after the call. If the argument did not
//    otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
//    replaced in the "early" arg list (gtCallArgs) with a placeholder node.

#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
{
    GenTreePtr args;
    GenTreePtr argx;

    unsigned flagsSummary    = 0;
    unsigned genPtrArgCntSav = fgPtrArgCntCur;

    unsigned argIndex = 0;

    unsigned intArgRegNum = 0;
    unsigned fltArgRegNum = 0;

#ifdef _TARGET_ARM_
    regMaskTP argSkippedRegMask    = RBM_NONE;
    regMaskTP fltArgSkippedRegMask = RBM_NONE;
#endif //  _TARGET_ARM_

#if defined(_TARGET_X86_)
    unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
#else
    const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
#endif

    unsigned argSlots                = 0;
    unsigned nonRegPassedStructSlots = 0;
    bool     reMorphing              = call->AreArgsComplete();
    bool     callHasRetBuffArg       = call->HasRetBufArg();

#ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
    bool callIsVararg = call->IsVarargs();
#endif

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
    // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
    // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
    // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
    bool hasStackArgCopy = false;
#endif

#ifndef LEGACY_BACKEND
    // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
    // following the normal calling convention or in the normal argument registers. We either mark existing
    // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
    // non-standard arguments into the argument list, below.
    class NonStandardArgs
    {
        struct NonStandardArg
        {
            regNumber reg;  // The register to be assigned to this non-standard argument.
            GenTree*  node; // The tree node representing this non-standard argument.
                            //   Note that this must be updated if the tree node changes due to morphing!
        };

        ArrayStack<NonStandardArg> args;

    public:
        NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
        {
        }

        //-----------------------------------------------------------------------------
        // Add: add a non-standard argument to the table of non-standard arguments
        //
        // Arguments:
        //    node - a GenTree node that has a non-standard argument.
        //    reg - the register to assign to this node.
        //
        // Return Value:
        //    None.
        //
        void Add(GenTree* node, regNumber reg)
        {
            NonStandardArg nsa = {reg, node};
            args.Push(nsa);
        }

        //-----------------------------------------------------------------------------
        // Find: Look for a GenTree* in the set of non-standard args.
        //
        // Arguments:
        //    node - a GenTree node to look for
        //
        // Return Value:
        //    The index of the non-standard argument (a non-negative, unique, stable number).
        //    If the node is not a non-standard argument, return -1.
        //
        int Find(GenTree* node)
        {
            for (int i = 0; i < args.Height(); i++)
            {
                if (node == args.Index(i).node)
                {
                    return i;
                }
            }
            return -1;
        }

        //-----------------------------------------------------------------------------
        // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
        // set the register to use for the node.
        //
        // Arguments:
        //    node - a GenTree node to look for
        //    pReg - an OUT argument. *pReg is set to the non-standard register to use if
        //           'node' is found in the non-standard argument set.
        //
        // Return Value:
        //    'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
        //          register to use.
        //    'false' otherwise (in this case, *pReg is unmodified).
        //
        bool FindReg(GenTree* node, regNumber* pReg)
        {
            for (int i = 0; i < args.Height(); i++)
            {
                NonStandardArg& nsa = args.IndexRef(i);
                if (node == nsa.node)
                {
                    *pReg = nsa.reg;
                    return true;
                }
            }
            return false;
        }

        //-----------------------------------------------------------------------------
        // Replace: Replace the non-standard argument node at a given index. This is done when
        // the original node was replaced via morphing, but we need to continue to assign a
        // particular non-standard arg to it.
        //
        // Arguments:
        //    index - the index of the non-standard arg. It must exist.
        //    node - the new GenTree node.
        //
        // Return Value:
        //    None.
        //
        void Replace(int index, GenTree* node)
        {
            args.IndexRef(index).node = node;
        }

    } nonStandardArgs(this);
#endif // !LEGACY_BACKEND

    // Count of args. On first morph, this is counted before we've filled in the arg table.
    // On remorph, we grab it from the arg table.
    unsigned numArgs = 0;

    // Process the late arguments (which were determined by a previous caller).
    // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
    // may need to refer to it.
    if (reMorphing)
    {
        // We need to reMorph the gtCallLateArgs early since that is what triggers
        // the expression folding and we need to have the final folded gtCallLateArgs
        // available when we call RemorphRegArg so that we correctly update the fgArgInfo
        // with the folded tree that represents the final optimized argument nodes.
        //
        // However if a range-check needs to be generated for any of these late
        // arguments we also need to "know" what the stack depth will be when we generate
        // code to branch to the throw range check failure block as that is part of the
        // GC information contract for that block.
        //
        // Since the late arguments are evaluated last we have pushed all of the
        // other arguments on the stack before we evaluate these late arguments,
        // so we record the stack depth on the first morph call when reMorphing
        // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
        //
        if (call->gtCallLateArgs != nullptr)
        {
            unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
            fgPtrArgCntCur += callStkLevel;
            call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
            flagsSummary |= call->gtCallLateArgs->gtFlags;
            fgPtrArgCntCur -= callStkLevel;
        }
        assert(call->fgArgInfo != nullptr);
        call->fgArgInfo->RemorphReset();

        numArgs = call->fgArgInfo->ArgCount();
    }
    else
    {
        // First we need to count the args
        if (call->gtCallObjp)
        {
            numArgs++;
        }
        for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
        {
            numArgs++;
        }

        // Insert or mark non-standard args. These are either outside the normal calling convention, or
        // arguments registers that don't follow the normal progression of argument registers in the calling
        // convention (such as for the ARM64 fixed return buffer argument x8).
        //
        // *********** NOTE *************
        // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
        // in the implementation of fast tail call.
        // *********** END NOTE *********
        CLANG_FORMAT_COMMENT_ANCHOR;

#if !defined(LEGACY_BACKEND)
#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
        // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
        // Set the argument registers correctly here.
        if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
        {
            GenTreeArgList* args = call->gtCallArgs;
            GenTree*        arg1 = args->Current();
            assert(arg1 != nullptr);
            nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
        }
#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
#if defined(_TARGET_X86_)
        // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
        // hi part to be in EDX. This sets the argument registers up correctly.
        else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
                 call->IsHelperCall(this, CORINFO_HELP_LRSZ))
        {
            GenTreeArgList* args = call->gtCallArgs;
            GenTree*        arg1 = args->Current();
            assert(arg1 != nullptr);
            nonStandardArgs.Add(arg1, REG_LNGARG_LO);

            args          = args->Rest();
            GenTree* arg2 = args->Current();
            assert(arg2 != nullptr);
            nonStandardArgs.Add(arg2, REG_LNGARG_HI);
        }
#else  // !defined(_TARGET_X86_)
        // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
        // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
        // convention for x86/SSE.

        // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
        //
        if (hasFixedRetBuffReg() && call->HasRetBufArg())
        {
            args = call->gtCallArgs;
            assert(args != nullptr);
            assert(args->OperIsList());

            argx = call->gtCallArgs->Current();

            // We don't increment numArgs here, since we already counted this argument above.

            nonStandardArgs.Add(argx, theFixedRetBuffReg());
        }

        // We are allowed to have a Fixed Return Buffer argument combined
        // with any of the remaining non-standard arguments
        //
        if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
        {
            assert(!call->gtCallCookie);
            // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
            // It will be used only on the intercepted-for-host code path to copy the arguments.

            GenTree* cns     = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
            call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
            numArgs++;

            nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
        }
        else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
        {
            // indirect VSD stubs need the base of the indirection cell to be
            // passed in addition.  At this point that is the value in gtCallAddr.
            // The actual call target will be derived from gtCallAddr in call
            // lowering.

            // If it is a VSD call getting dispatched via tail call helper,
            // fgMorphTailCall() would materialize stub addr as an additional
            // parameter added to the original arg list and hence no need to
            // add as a non-standard arg.

            GenTree* arg = call->gtCallAddr;
            if (arg->OperIsLocal())
            {
                arg = gtClone(arg, true);
            }
            else
            {
                call->gtCallAddr = fgInsertCommaFormTemp(&arg);
                call->gtFlags |= GTF_ASG;
            }
            noway_assert(arg != nullptr);

            // And push the stub address onto the list of arguments
            call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
            numArgs++;

            nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
        }
        else
#endif // defined(_TARGET_X86_)
        if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
        {
            assert(!call->IsUnmanaged());

            GenTree* arg = call->gtCallCookie;
            noway_assert(arg != nullptr);
            call->gtCallCookie = nullptr;

#if defined(_TARGET_X86_)
            // x86 passes the cookie on the stack as the final argument to the call.
            GenTreeArgList** insertionPoint = &call->gtCallArgs;
            for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
            {
            }
            *insertionPoint = gtNewListNode(arg, nullptr);
#else  // !defined(_TARGET_X86_)
            // All other architectures pass the cookie in a register.
            call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
#endif // defined(_TARGET_X86_)

            nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
            numArgs++;

            // put destination into R10/EAX
            arg              = gtClone(call->gtCallAddr, true);
            call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
            numArgs++;

            nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);

            // finally change this call to a helper call
            call->gtCallType    = CT_HELPER;
            call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
        }
#endif // !defined(LEGACY_BACKEND)

        // Allocate the fgArgInfo for the call node;
        //
        call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
    }

    if (varTypeIsStruct(call))
    {
        fgFixupStructReturn(call);
    }

    /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
     * During the first call to fgMorphArgs we also record the
     * information about late arguments we have in 'fgArgInfo'.
     * This information is used later to contruct the gtCallLateArgs */

    /* Process the 'this' argument value, if present */

    argx = call->gtCallObjp;

    if (argx)
    {
        argx             = fgMorphTree(argx);
        call->gtCallObjp = argx;
        flagsSummary |= argx->gtFlags;

        assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);

        assert(argIndex == 0);

        /* We must fill in or update the argInfo table */

        if (reMorphing)
        {
            /* this is a register argument - possibly update it in the table */
            call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
        }
        else
        {
            assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));

            /* this is a register argument - put it in the table */
            call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                                       ,
                                       false, REG_STK, nullptr
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
                                       );
        }
        // this can't be a struct.
        assert(argx->gtType != TYP_STRUCT);

        /* Increment the argument register count and argument index */
        if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
        {
            intArgRegNum++;
#ifdef WINDOWS_AMD64_ABI
            // Whenever we pass an integer register argument
            // we skip the corresponding floating point register argument
            fltArgRegNum++;
#endif // WINDOWS_AMD64_ABI
        }
        else
        {
            noway_assert(!"the 'this' pointer can not be a floating point type");
        }
        argIndex++;
        argSlots++;
    }

#ifdef _TARGET_X86_
    // Compute the maximum number of arguments that can be passed in registers.
    // For X86 we handle the varargs and unmanaged calling conventions

    if (call->gtFlags & GTF_CALL_POP_ARGS)
    {
        noway_assert(intArgRegNum < MAX_REG_ARG);
        // No more register arguments for varargs (CALL_POP_ARGS)
        maxRegArgs = intArgRegNum;

        // Add in the ret buff arg
        if (callHasRetBuffArg)
            maxRegArgs++;
    }

    if (call->IsUnmanaged())
    {
        noway_assert(intArgRegNum == 0);

        if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
        {
            noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
                         call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
                         call->gtCallArgs->gtOp.gtOp1->gtOper ==
                             GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
            maxRegArgs = 1;
        }
        else
        {
            maxRegArgs = 0;
        }

        // Add in the ret buff arg
        if (callHasRetBuffArg)
            maxRegArgs++;
    }
#endif // _TARGET_X86_

    /* Morph the user arguments */
    CLANG_FORMAT_COMMENT_ANCHOR;

#if defined(_TARGET_ARM_)

    // The ARM ABI has a concept of back-filling of floating-point argument registers, according
    // to the "Procedure Call Standard for the ARM Architecture" document, especially
    // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
    // appear in a lower-numbered register than floating point argument N. That is, argument
    // register allocation is not strictly increasing. To support this, we need to keep track of unused
    // floating-point argument registers that we can back-fill. We only support 4-byte float and
    // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
    // only back-fill single registers, since there is no way with these types to create
    // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
    // available (with 16 FP argument registers). Consider this code:
    //
    // struct HFA { float x, y, z; }; // a three element HFA
    // void bar(float a1,   // passed in f0
    //          double a2,  // passed in f2/f3; skip f1 for alignment
    //          HFA a3,     // passed in f4/f5/f6
    //          double a4,  // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
    //          HFA a5,     // passed in f10/f11/f12
    //          double a6,  // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
    //                      // slots
    //          float a7,   // passed in f1 (back-filled)
    //          float a8,   // passed in f7 (back-filled)
    //          float a9,   // passed in f13 (back-filled)
    //          float a10)  // passed on the stack in [OutArg+0]
    //
    // Note that if we ever support FP types with larger alignment requirements, then there could
    // be more than single register back-fills.
    //
    // Once we assign a floating-pointer register to the stack, they all must be on the stack.
    // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
    // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
    // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
    // and prevent any additional floating-point arguments from going in registers.

    bool anyFloatStackArgs = false;

#endif // _TARGET_ARM_

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
    SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING

    bool hasStructArgument     = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
    bool hasMultiregStructArgs = false;
    for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
    {
        GenTreePtr* parentArgx = &args->gtOp.gtOp1;

#if FEATURE_MULTIREG_ARGS
        if (!hasStructArgument)
        {
            hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
        }
#endif // FEATURE_MULTIREG_ARGS

#ifndef LEGACY_BACKEND
        // Record the index of any nonStandard arg that we may be processing here, as we are
        // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
        GenTreePtr orig_argx         = *parentArgx;
        int        nonStandard_index = nonStandardArgs.Find(orig_argx);
#endif // !LEGACY_BACKEND

        argx        = fgMorphTree(*parentArgx);
        *parentArgx = argx;
        flagsSummary |= argx->gtFlags;

        assert(args->OperIsList());
        assert(argx == args->Current());

#ifndef LEGACY_BACKEND
        if ((nonStandard_index != -1) && (argx != orig_argx))
        {
            // We need to update the node field for this nonStandard arg here
            // as it was changed by the call to fgMorphTree
            nonStandardArgs.Replace(nonStandard_index, argx);
        }
#endif // !LEGACY_BACKEND

        /* Change the node to TYP_I_IMPL so we don't report GC info
         * NOTE: We deferred this from the importer because of the inliner */

        if (argx->IsVarAddr())
        {
            argx->gtType = TYP_I_IMPL;
        }

        bool     passUsingFloatRegs;
        unsigned argAlign = 1;
        // Setup any HFA information about 'argx'
        var_types hfaType  = GetHfaType(argx);
        bool      isHfaArg = varTypeIsFloating(hfaType);
        unsigned  hfaSlots = 0;

        if (isHfaArg)
        {
            hfaSlots = GetHfaCount(argx);

            // If we have a HFA struct it's possible we transition from a method that originally
            // only had integer types to now start having FP types.  We have to communicate this
            // through this flag since LSRA later on will use this flag to determine whether
            // or not to track the FP register set.
            //
            compFloatingPointUsed = true;
        }

        unsigned             size          = 0;
        CORINFO_CLASS_HANDLE copyBlkClass  = nullptr;
        bool                 isRegArg      = false;
        bool                 isNonStandard = false;
        regNumber            nonStdRegNum  = REG_NA;

        fgArgTabEntryPtr argEntry = nullptr;

        if (reMorphing)
        {
            argEntry = gtArgEntryByArgNum(call, argIndex);
        }

#ifdef _TARGET_ARM_

        bool passUsingIntRegs;
        if (reMorphing)
        {
            passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
            passUsingIntRegs   = isValidIntArgReg(argEntry->regNum);
        }
        else
        {
            passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
            passUsingIntRegs   = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
        }

        GenTreePtr curArg = argx;
        // If late args have already been computed, use the node in the argument table.
        if (argEntry != NULL && argEntry->isTmp)
        {
            curArg = argEntry->node;
        }

        if (reMorphing)
        {
            argAlign = argEntry->alignment;
        }
        else
        {
            // We don't use the "size" return value from InferOpSizeAlign().
            codeGen->InferOpSizeAlign(curArg, &argAlign);

            argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
            argAlign /= TARGET_POINTER_SIZE;
        }

        if (argAlign == 2)
        {
            if (passUsingFloatRegs)
            {
                if (fltArgRegNum % 2 == 1)
                {
                    fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
                    fltArgRegNum++;
                }
            }
            else if (passUsingIntRegs)
            {
                if (intArgRegNum % 2 == 1)
                {
                    argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
                    intArgRegNum++;
                }
            }

            if (argSlots % 2 == 1)
            {
                argSlots++;
            }
        }

#elif defined(_TARGET_ARM64_)

        if (reMorphing)
        {
            passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
        }
        else
        {
            passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
        }

#elif defined(_TARGET_AMD64_)
        if (reMorphing)
        {
            passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
        }
        else
        {
            passUsingFloatRegs = varTypeIsFloating(argx);
        }
#elif defined(_TARGET_X86_)

        passUsingFloatRegs = false;

#else
#error Unsupported or unset target architecture
#endif // _TARGET_*

        bool      isBackFilled     = false;
        unsigned  nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
        var_types structBaseType   = TYP_STRUCT;
        unsigned  structSize       = 0;

        bool isStructArg = varTypeIsStruct(argx);

        if (reMorphing)
        {
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
            // Get the struct description for the already completed struct argument.
            fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
            assert(fgEntryPtr != nullptr);

            // As described in few other places, this can happen when the argx was morphed
            // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
            // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
            // was a struct and the struct classification.
            isStructArg = fgEntryPtr->isStruct;

            if (isStructArg)
            {
                structDesc.CopyFrom(fgEntryPtr->structDesc);
            }
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)

            assert(argEntry != nullptr);
            if (argEntry->IsBackFilled())
            {
                isRegArg         = true;
                size             = argEntry->numRegs;
                nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
                assert(size == 1);
                isBackFilled = true;
            }
            else if (argEntry->regNum == REG_STK)
            {
                isRegArg = false;
                assert(argEntry->numRegs == 0);
                size = argEntry->numSlots;
            }
            else
            {
                isRegArg = true;
                assert(argEntry->numRegs > 0);
                size = argEntry->numRegs + argEntry->numSlots;
            }

            // This size has now been computed
            assert(size != 0);
        }
        else // !reMorphing
        {
            //
            // Figure out the size of the argument. This is either in number of registers, or number of
            // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
            // the stack.
            //
            if (argx->IsArgPlaceHolderNode() || (!isStructArg))
            {
#if defined(_TARGET_AMD64_)
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                if (!isStructArg)
                {
                    size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
                }
                else
                {
                    size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
                                              TARGET_POINTER_SIZE)) /
                           TARGET_POINTER_SIZE;
                    eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
                    if (size > 1)
                    {
                        hasMultiregStructArgs = true;
                    }
                }
#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
                size         = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
                if (isStructArg)
                {
                    if (isHfaArg)
                    {
                        size = GetHfaCount(argx);
                        // HFA structs are passed by value in multiple registers
                        hasMultiregStructArgs = true;
                    }
                    else
                    {
                        // Structs are either passed in 1 or 2 (64-bit) slots
                        size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
                                                  TARGET_POINTER_SIZE)) /
                               TARGET_POINTER_SIZE;

                        if (size == 2)
                        {
                            // Structs that are the size of 2 pointers are passed by value in multiple registers
                            hasMultiregStructArgs = true;
                        }
                        else if (size > 2)
                        {
                            size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
                                      // reference (to a copy)
                        }
                    }
                    // Note that there are some additional rules for multireg structs.
                    // (i.e they cannot be split between registers and the stack)
                }
                else
                {
                    size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
                }
#elif defined(_TARGET_ARM_)
                if (isStructArg)
                {
                    size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
                                              TARGET_POINTER_SIZE)) /
                           TARGET_POINTER_SIZE;
                    if (isHfaArg)
                    {
                        hasMultiregStructArgs = true;
                    }
                    else if (size > 1 && size <= 4)
                    {
                        hasMultiregStructArgs = true;
                    }
                }
                else
                {
                    // The typical case
                    // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase
                    size = genTypeStSz(argx->gtType);
                }
#elif defined(_TARGET_X86_)
                size       = genTypeStSz(argx->gtType);
#else
#error Unsupported or unset target architecture
#endif // _TARGET_XXX_
            }
#ifdef _TARGET_ARM_
            else if (isHfaArg)
            {
                size                  = GetHfaCount(argx);
                hasMultiregStructArgs = true;
            }
#endif           // _TARGET_ARM_
            else // struct type
            {
                // We handle two opcodes: GT_MKREFANY and GT_OBJ
                if (argx->gtOper == GT_MKREFANY)
                {
                    if (varTypeIsStruct(argx))
                    {
                        isStructArg = true;
                    }
#ifdef _TARGET_AMD64_
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                    if (varTypeIsStruct(argx))
                    {
                        size                 = info.compCompHnd->getClassSize(impGetRefAnyClass());
                        unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
                        size                 = roundupSize / TARGET_POINTER_SIZE;
                        eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
                    }
                    else
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                    {
                        size = 1;
                    }
#else
                    size                 = 2;
#endif
                }
                else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
                {
                    GenTreePtr  argObj         = argx;
                    GenTreePtr* parentOfArgObj = parentArgx;

                    assert(args->OperIsList());
                    assert(argx == args->Current());

                    /* The GT_OBJ may be be a child of a GT_COMMA */
                    while (argObj->gtOper == GT_COMMA)
                    {
                        parentOfArgObj = &argObj->gtOp.gtOp2;
                        argObj         = argObj->gtOp.gtOp2;
                    }

                    // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
                    if (argObj->gtOper != GT_OBJ)
                    {
                        BADCODE("illegal argument tree in fgMorphArgs");
                    }

                    CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                    eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING

                    unsigned originalSize = info.compCompHnd->getClassSize(objClass);
                    originalSize          = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
                    unsigned roundupSize  = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);

                    structSize = originalSize;

                    structPassingKind howToPassStruct;
                    structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);

#ifdef _TARGET_ARM64_
                    if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
                        !isPow2(originalSize))                    // size is 3,5,6 or 7 bytes
                    {
                        if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
                        {
                            // For ARM64 we pass structs that are 3,5,6,7 bytes in size
                            // we can read 4 or 8 bytes from the LclVar to pass this arg
                            originalSize = genTypeSize(structBaseType);
                        }
                    }
#endif //  _TARGET_ARM64_

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                    // On System V OS-es a struct is never passed by reference.
                    // It is either passed by value on the stack or in registers.
                    bool passStructInRegisters = false;
#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
                    bool passStructByRef = false;
#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING

                    // The following if-then-else needs to be carefully refactored.
                    // Basically the else portion wants to turn a struct load (a GT_OBJ)
                    // into a GT_IND of the appropriate size.
                    // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
                    // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined  (Why?)
                    // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
                    // It also can't do this if we have a HFA arg,
                    // unless we have a 1-elem HFA in which case we want to do the optimization.
                    CLANG_FORMAT_COMMENT_ANCHOR;

#ifndef _TARGET_X86_
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
                    // Check for struct argument with size 1, 2, 4 or 8 bytes
                    // As we can optimize these by turning them into a GT_IND of the correct type
                    //
                    // Check for cases that we cannot optimize:
                    //
                    if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
                        !isPow2(originalSize) ||                // it is not a power of two (1, 2, 4 or 8)
                        (isHfaArg && (hfaSlots != 1)))          // it is a one element HFA struct
#endif                                                          // FEATURE_UNIX_AMD64_STRUCT_PASSING
                    {
                        // Normalize 'size' to the number of pointer sized items
                        // 'size' is the number of register slots that we will use to pass the argument
                        size = roundupSize / TARGET_POINTER_SIZE;
#if defined(_TARGET_AMD64_)
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
                        size            = 1; // This must be copied to a temp and passed by address
                        passStructByRef = true;
                        copyBlkClass    = objClass;
#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
                        if (!structDesc.passedInRegisters)
                        {
                            GenTreePtr lclVar     = fgIsIndirOfAddrOfLocal(argObj);
                            bool       needCpyBlk = false;
                            if (lclVar != nullptr)
                            {
                                // If the struct is promoted to registers, it has to be materialized
                                // on stack. We may want to support promoted structures in
                                // codegening pugarg_stk instead of creating a copy here.
                                LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
                                needCpyBlk        = varDsc->lvPromoted;
                            }
                            else
                            {
                                // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
                                // sets structDesc.passedInRegisters to be false.
                                //
                                // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
                                // by rationalizer. For now we will let SIMD struct arg to be copied to
                                // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
                                //
                                // +--*  obj       simd16
                                // |  \--*  addr      byref
                                // |     |  /--*  lclVar    simd16 V05 loc4
                                // |     \--*  simd      simd16 int -
                                // |        \--*  lclVar    simd16 V08 tmp1
                                //
                                // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
                                // so that we don't need to generate a copy here.
                                GenTree* addr = argObj->gtOp.gtOp1;
                                if (addr->OperGet() == GT_ADDR)
                                {
                                    GenTree* addrChild = addr->gtOp.gtOp1;
                                    if (addrChild->OperGet() == GT_SIMD)
                                    {
                                        needCpyBlk = true;
                                    }
                                }
                            }
                            passStructInRegisters = false;
                            if (needCpyBlk)
                            {
                                copyBlkClass = objClass;
                            }
                            else
                            {
                                copyBlkClass = NO_CLASS_HANDLE;
                            }
                        }
                        else
                        {
                            // The objClass is used to materialize the struct on stack.
                            // For SystemV, the code below generates copies for struct arguments classified
                            // as register argument.
                            // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
                            // can be passed on registers or can be copied directly to outgoing area.
                            passStructInRegisters = true;
                            copyBlkClass          = objClass;
                        }

#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
                        if ((size > 2) && !isHfaArg)
                        {
                            size            = 1; // This must be copied to a temp and passed by address
                            passStructByRef = true;
                            copyBlkClass    = objClass;
                        }
#endif

#ifdef _TARGET_ARM_
                        // If we're passing a promoted struct local var,
                        // we may need to skip some registers due to alignment; record those.
                        GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
                        if (lclVar != NULL)
                        {
                            LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
                            if (varDsc->lvPromoted)
                            {
                                assert(argObj->OperGet() == GT_OBJ);
                                if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
                                {
                                    fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
                                }
                            }
                        }
#endif // _TARGET_ARM_
                    }
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
                    // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
                    // generated for struct 1, 2, 4, or 8.
                    else // We have a struct argument with size 1, 2, 4 or 8 bytes
                    {
                        // change our GT_OBJ into a GT_IND of the correct type.
                        // We've already ensured above that size is a power of 2, and less than or equal to pointer
                        // size.

                        assert(howToPassStruct == SPK_PrimitiveType);

                        // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
                        // primitives
                        if (isHfaArg)
                        {
                            // If we reach here with an HFA arg it has to be a one element HFA
                            assert(hfaSlots == 1);
                            structBaseType = hfaType; // change the indirection type to a floating point type
                        }

                        noway_assert(structBaseType != TYP_UNKNOWN);

                        argObj->ChangeOper(GT_IND);

                        // Now see if we can fold *(&X) into X
                        if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
                        {
                            GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;

                            // Keep the DONT_CSE flag in sync
                            // (as the addr always marks it for its op1)
                            temp->gtFlags &= ~GTF_DONT_CSE;
                            temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
                            DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
                            DEBUG_DESTROY_NODE(argObj);             // GT_IND

                            argObj          = temp;
                            *parentOfArgObj = temp;

                            // If the OBJ had been the top level node, we've now changed argx.
                            if (parentOfArgObj == parentArgx)
                            {
                                argx = temp;
                            }
                        }
                        if (argObj->gtOper == GT_LCL_VAR)
                        {
                            unsigned   lclNum = argObj->gtLclVarCommon.gtLclNum;
                            LclVarDsc* varDsc = &lvaTable[lclNum];

                            if (varDsc->lvPromoted)
                            {
                                if (varDsc->lvFieldCnt == 1)
                                {
                                    // get the first and only promoted field
                                    LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
                                    if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
                                    {
                                        // we will use the first and only promoted field
                                        argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);

                                        if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
                                            (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
                                        {
                                            // Just use the existing field's type
                                            argObj->gtType = fieldVarDsc->TypeGet();
                                        }
                                        else
                                        {
                                            // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
                                            // to a new type
                                            argObj->ChangeOper(GT_LCL_FLD);
                                            argObj->gtType = structBaseType;
                                        }
                                        assert(varTypeCanReg(argObj->TypeGet()));
                                        assert(copyBlkClass == NO_CLASS_HANDLE);
                                    }
                                    else
                                    {
                                        // use GT_LCL_FLD to swizzle the single field struct to a new type
                                        lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
                                        argObj->ChangeOper(GT_LCL_FLD);
                                        argObj->gtType = structBaseType;
                                    }
                                }
                                else
                                {
                                    // The struct fits into a single register, but it has been promoted into its
                                    // constituent fields, and so we have to re-assemble it
                                    copyBlkClass = objClass;
#ifdef _TARGET_ARM_
                                    // Alignment constraints may cause us not to use (to "skip") some argument
                                    // registers. Add those, if any, to the skipped (int) arg reg mask.
                                    fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
#endif // _TARGET_ARM_
                                }
                            }
                            else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
                            {
                                // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
                                argObj->ChangeOper(GT_LCL_FLD);
                                argObj->gtType = structBaseType;
                            }
                        }
                        else
                        {
                            // Not a GT_LCL_VAR, so we can just change the type on the node
                            argObj->gtType = structBaseType;
                        }
                        assert(varTypeCanReg(argObj->TypeGet()) ||
                               ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));

                        size = 1;
                    }
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING

#endif // not _TARGET_X86_
                    // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
                    if ((structBaseType == TYP_STRUCT) &&
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                        !passStructInRegisters
#else  // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                        !passStructByRef
#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                        )
                    {
                        if (isHfaArg && passUsingFloatRegs)
                        {
                            size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
                        }
                        else
                        {
                            // If the valuetype size is not a multiple of sizeof(void*),
                            // we must copyblk to a temp before doing the obj to avoid
                            // the obj reading memory past the end of the valuetype
                            CLANG_FORMAT_COMMENT_ANCHOR;

                            if (roundupSize > originalSize)
                            {
                                copyBlkClass = objClass;

                                // There are a few special cases where we can omit using a CopyBlk
                                // where we normally would need to use one.

                                if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
                                {
                                    copyBlkClass = NO_CLASS_HANDLE;
                                }
                            }

                            size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
                        }
                    }
                }

#ifdef _TARGET_64BIT_
                if (size > 1)
                {
                    hasMultiregStructArgs = true;
                }
#elif defined(_TARGET_ARM_)
                // TODO-Arm: Need to handle the case
                // where structs passed by value can be split between registers and stack.
                if (size > 1 && size <= 4)
                {
                    hasMultiregStructArgs = true;
                }
#ifndef LEGACY_BACKEND
                else if (size > 4 && passUsingIntRegs)
                {
                    NYI_ARM("Struct can be split between registers and stack");
                }
#endif // !LEGACY_BACKEND
#endif // _TARGET_ARM_
            }

            // The 'size' value has now must have been set. (the original value of zero is an invalid value)
            assert(size != 0);

            //
            // Figure out if the argument will be passed in a register.
            //

            if (isRegParamType(genActualType(argx->TypeGet()))
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                && (!isStructArg || structDesc.passedInRegisters)
#endif
                    )
            {
#ifdef _TARGET_ARM_
                if (passUsingFloatRegs)
                {
                    // First, see if it can be back-filled
                    if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
                        (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
                        (size == 1))                          // The size to back-fill is one float register
                    {
                        // Back-fill the register.
                        isBackFilled              = true;
                        regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
                        fltArgSkippedRegMask &=
                            ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
                        nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
                        assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
                    }

                    // Does the entire float, double, or HFA fit in the FP arg registers?
                    // Check if the last register needed is still in the argument register range.
                    isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;

                    if (!isRegArg)
                    {
                        anyFloatStackArgs = true;
                    }
                }
                else
                {
                    isRegArg = intArgRegNum < MAX_REG_ARG;
                }
#elif defined(_TARGET_ARM64_)
                if (passUsingFloatRegs)
                {
                    // Check if the last register needed is still in the fp argument register range.
                    isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;

                    // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
                    if (isHfaArg && !isRegArg)
                    {
                        // recompute the 'size' so that it represent the number of stack slots rather than the number of
                        // registers
                        //
                        unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
                        size                 = roundupSize / TARGET_POINTER_SIZE;

                        // We also must update fltArgRegNum so that we no longer try to
                        // allocate any new floating point registers for args
                        // This prevents us from backfilling a subsequent arg into d7
                        //
                        fltArgRegNum = MAX_FLOAT_REG_ARG;
                    }
                }
                else
                {
                    // Check if the last register needed is still in the int argument register range.
                    isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;

                    // Did we run out of registers when we had a 16-byte struct (size===2) ?
                    // (i.e we only have one register remaining but we needed two registers to pass this arg)
                    // This prevents us from backfilling a subsequent arg into x7
                    //
                    if (!isRegArg && (size > 1))
                    {
                        // We also must update intArgRegNum so that we no longer try to
                        // allocate any new general purpose registers for args
                        //
                        intArgRegNum = maxRegArgs;
                    }
                }
#else // not _TARGET_ARM_ or _TARGET_ARM64_

#if defined(UNIX_AMD64_ABI)

#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                // Here a struct can be passed in register following the classifications of its members and size.
                // Now make sure there are actually enough registers to do so.
                if (isStructArg)
                {
                    unsigned int structFloatRegs = 0;
                    unsigned int structIntRegs   = 0;
                    for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
                    {
                        if (structDesc.IsIntegralSlot(i))
                        {
                            structIntRegs++;
                        }
                        else if (structDesc.IsSseSlot(i))
                        {
                            structFloatRegs++;
                        }
                    }

                    isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
                               ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
                }
                else
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                {
                    if (passUsingFloatRegs)
                    {
                        isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
                    }
                    else
                    {
                        isRegArg = intArgRegNum < MAX_REG_ARG;
                    }
                }
#else  // !defined(UNIX_AMD64_ABI)
                isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
#endif // !defined(UNIX_AMD64_ABI)
#endif // _TARGET_ARM_
            }
            else
            {
                isRegArg = false;
            }

#ifndef LEGACY_BACKEND
            // If there are nonstandard args (outside the calling convention) they were inserted above
            // and noted them in a table so we can recognize them here and build their argInfo.
            //
            // They should not affect the placement of any other args or stack space required.
            // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
            isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
            if (isNonStandard && (nonStdRegNum == REG_STK))
            {
                isRegArg = false;
            }
#if defined(_TARGET_X86_)
            else if (call->IsTailCallViaHelper())
            {
                // We have already (before calling fgMorphArgs()) appended the 4 special args
                // required by the x86 tailcall helper. These args are required to go on the
                // stack. Force them to the stack here.
                assert(numArgs >= 4);
                if (argIndex >= numArgs - 4)
                {
                    isRegArg = false;
                }
            }
#endif    // defined(_TARGET_X86_)
#endif    // !LEGACY_BACKEND
        } // end !reMorphing

        //
        // Now we know if the argument goes in registers or not and how big it is,
        // whether we had to just compute it or this is a re-morph call and we looked it up.
        //
        CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef _TARGET_ARM_
        // If we ever allocate a floating point argument to the stack, then all
        // subsequent HFA/float/double arguments go on the stack.
        if (!isRegArg && passUsingFloatRegs)
        {
            for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
            {
                fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
            }
        }

        // If we think we're going to split a struct between integer registers and the stack, check to
        // see if we've already assigned a floating-point arg to the stack.
        if (isRegArg &&                            // We decided above to use a register for the argument
            !passUsingFloatRegs &&                 // We're using integer registers
            (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
            anyFloatStackArgs)                     // We've already used the stack for a floating-point argument
        {
            isRegArg = false; // Change our mind; don't pass this struct partially in registers

            // Skip the rest of the integer argument registers
            for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
            {
                argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
            }
        }

#endif // _TARGET_ARM_

        if (isRegArg)
        {
            regNumber nextRegNum = REG_STK;
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
            regNumber    nextOtherRegNum = REG_STK;
            unsigned int structFloatRegs = 0;
            unsigned int structIntRegs   = 0;

            if (isStructArg && structDesc.passedInRegisters)
            {
                // It is a struct passed in registers. Assign the next available register.
                assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
                regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
                for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
                {
                    if (structDesc.IsIntegralSlot(i))
                    {
                        *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
                        structIntRegs++;
                    }
                    else if (structDesc.IsSseSlot(i))
                    {
                        *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
                        structFloatRegs++;
                    }
                }
            }
            else
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
            {
                // fill in or update the argInfo table
                nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
                                                : genMapIntRegArgNumToRegNum(intArgRegNum);
            }

#ifdef _TARGET_AMD64_
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
            assert(size == 1);
#endif
#endif

            fgArgTabEntryPtr newArgEntry;
            if (reMorphing)
            {
                // This is a register argument - possibly update it in the table
                newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
            }
            else
            {
                if (isNonStandard)
                {
                    nextRegNum = nonStdRegNum;
                }

                // This is a register argument - put it in the table
                newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                                                         ,
                                                         isStructArg, nextOtherRegNum, &structDesc
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                                                         );

                newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
                                            isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
                newArgEntry->SetIsBackFilled(isBackFilled);
                newArgEntry->isNonStandard = isNonStandard;
            }

            if (newArgEntry->isNonStandard)
            {
                continue;
            }

            // Set up the next intArgRegNum and fltArgRegNum values.
            if (!isBackFilled)
            {
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                if (isStructArg)
                {
                    intArgRegNum += structIntRegs;
                    fltArgRegNum += structFloatRegs;
                }
                else
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
                {
                    if (passUsingFloatRegs)
                    {
                        fltArgRegNum += size;

#ifdef WINDOWS_AMD64_ABI
                        // Whenever we pass an integer register argument
                        // we skip the corresponding floating point register argument
                        intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
#endif // WINDOWS_AMD64_ABI
#ifdef _TARGET_ARM_
                        if (fltArgRegNum > MAX_FLOAT_REG_ARG)
                        {
#ifndef LEGACY_BACKEND
                            NYI_ARM("Struct split between float registers and stack");
#endif // !LEGACY_BACKEND
                            // This indicates a partial enregistration of a struct type
                            assert(varTypeIsStruct(argx));
                            unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
                            assert((unsigned char)numRegsPartial == numRegsPartial);
                            call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
                            fltArgRegNum = MAX_FLOAT_REG_ARG;
                        }
#endif // _TARGET_ARM_
                    }
                    else
                    {
                        if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
                        {
                            // we are setting up the fixed return buffer register argument
                            // so don't increment intArgRegNum
                            assert(size == 1);
                        }
                        else
                        {
                            // Increment intArgRegNum by 'size' registers
                            intArgRegNum += size;
                        }

#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
                        fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
#endif // _TARGET_AMD64_
#ifdef _TARGET_ARM_
                        if (intArgRegNum > MAX_REG_ARG)
                        {
#ifndef LEGACY_BACKEND
                            NYI_ARM("Struct split between integer registers and stack");
#endif // !LEGACY_BACKEND
                            // This indicates a partial enregistration of a struct type
                            assert((isStructArg) || argx->OperIsCopyBlkOp() ||
                                   (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
                            unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
                            assert((unsigned char)numRegsPartial == numRegsPartial);
                            call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
                            intArgRegNum = MAX_REG_ARG;
                            fgPtrArgCntCur += size - numRegsPartial;
                        }
#endif // _TARGET_ARM_
                    }
                }
            }
        }
        else // We have an argument that is not passed in a register
        {
            fgPtrArgCntCur += size;

            // If the register arguments have not been determined then we must fill in the argInfo

            if (reMorphing)
            {
                // This is a stack argument - possibly update it in the table
                call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
            }
            else
            {
                // This is a stack argument - put it in the table
                call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
                                           argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
            }
        }

        if (copyBlkClass != NO_CLASS_HANDLE)
        {
            noway_assert(!reMorphing);
            fgMakeOutgoingStructArgCopy(call, args, argIndex,
                                        copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));

            // This can cause a GTF_EXCEPT flag to be set.
            // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
            // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
            // there are no register arguments. Then reMorphing is never true, so we keep re-copying
            // any struct arguments.
            // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
            flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
            hasStackArgCopy = true;
#endif
        }

#ifndef LEGACY_BACKEND
        if (argx->gtOper == GT_MKREFANY)
        {
            // 'Lower' the MKREFANY tree and insert it.
            noway_assert(!reMorphing);

#ifndef _TARGET_64BIT_

            // Build the mkrefany as a GT_FIELD_LIST
            GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
                GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
            (void)new (this, GT_FIELD_LIST)
                GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
            fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
            fp->node            = fieldList;
            args->gtOp.gtOp1    = fieldList;

#else  // _TARGET_64BIT_

            // Get a new temp
            // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
            unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
            lvaSetStruct(tmp, impGetRefAnyClass(), false);

            // Build the mkrefany as a comma node:
            // (tmp.ptr=argx),(tmp.type=handle)
            GenTreeLclFld* destPtrSlot  = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
            GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
            destPtrSlot->gtFieldSeq     = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
            destPtrSlot->gtFlags |= GTF_VAR_DEF;
            destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
            destTypeSlot->gtFlags |= GTF_VAR_DEF;

            GenTreePtr asgPtrSlot  = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
            GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
            GenTreePtr asg         = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);

            // Change the expression to "(tmp=val)"
            args->gtOp.gtOp1 = asg;

            // EvalArgsToTemps will cause tmp to actually get loaded as the argument
            call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
            lvaSetVarAddrExposed(tmp);
#endif // _TARGET_64BIT_
        }
#endif // !LEGACY_BACKEND

#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
        if (isStructArg)
        {
            GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
            if ((lclNode != nullptr) &&
                (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
            {
                // Make a GT_FIELD_LIST of the field lclVars.
                GenTreeLclVarCommon* lcl       = lclNode->AsLclVarCommon();
                LclVarDsc*           varDsc    = &(lvaTable[lcl->gtLclNum]);
                GenTreeFieldList*    fieldList = nullptr;
                for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
                     fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
                {
                    LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
                    if (fieldList == nullptr)
                    {
                        lcl->SetLclNum(fieldLclNum);
                        lcl->ChangeOper(GT_LCL_VAR);
                        lcl->gtType = fieldVarDsc->lvType;
                        fieldList   = new (this, GT_FIELD_LIST)
                            GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
                        fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
                        fp->node            = fieldList;
                        args->gtOp.gtOp1    = fieldList;
                    }
                    else
                    {
                        GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
                        fieldList         = new (this, GT_FIELD_LIST)
                            GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
                    }
                }
            }
        }
#endif // _TARGET_X86_ && !LEGACY_BACKEND

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
        if (isStructArg && !isRegArg)
        {
            nonRegPassedStructSlots += size;
        }
        else
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
        {
            argSlots += size;
        }
    } // end foreach argument loop

    if (!reMorphing)
    {
        call->fgArgInfo->ArgsComplete();

#ifdef LEGACY_BACKEND
        call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
#if defined(_TARGET_ARM_)
        call->gtCallRegUsedMask &= ~argSkippedRegMask;
#endif
        if (fltArgRegNum > 0)
        {
#if defined(_TARGET_ARM_)
            call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
#endif
        }
#endif // LEGACY_BACKEND
    }

    if (call->gtCallArgs)
    {
        UpdateGT_LISTFlags(call->gtCallArgs);
    }

    /* Process the function address, if indirect call */

    if (call->gtCallType == CT_INDIRECT)
    {
        call->gtCallAddr = fgMorphTree(call->gtCallAddr);
    }

    call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);

    if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
    {
        fgPtrArgCntCur++;
    }

    /* Remember the maximum value we ever see */

    if (fgPtrArgCntMax < fgPtrArgCntCur)
    {
        JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
        fgPtrArgCntMax = fgPtrArgCntCur;
    }

    assert(fgPtrArgCntCur >= genPtrArgCntSav);
    call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);

    /* The call will pop all the arguments we pushed */

    fgPtrArgCntCur = genPtrArgCntSav;

#if FEATURE_FIXED_OUT_ARGS

    // Record the outgoing argument size.  If the call is a fast tail
    // call, it will setup its arguments in incoming arg area instead
    // of the out-going arg area, so we don't need to track the
    // outgoing arg size.
    if (!call->IsFastTailCall())
    {
        unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();

#if defined(UNIX_AMD64_ABI)
        opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly

        // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.

        // First slots go in registers only, no stack needed.
        // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
        // and ignores floating point args (it is overly conservative in that case).
        preallocatedArgCount = nonRegPassedStructSlots;
        if (argSlots > MAX_REG_ARG)
        {
            preallocatedArgCount += argSlots - MAX_REG_ARG;
        }
#endif // UNIX_AMD64_ABI

        const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
        call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));

#ifdef DEBUG
        if (verbose)
        {
            printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
                   preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
        }
#endif
    }
#endif // FEATURE_FIXED_OUT_ARGS

    /* Update the 'side effect' flags value for the call */

    call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);

    // If the register arguments have already been determined
    // or we have no register arguments then we don't need to
    // call SortArgs() and EvalArgsToTemps()
    //
    // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
    // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
    // is added to make sure to call EvalArgsToTemp.
    if (!reMorphing && (call->fgArgInfo->HasRegArgs()
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
                        || hasStackArgCopy
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
                        ))
    {
        // This is the first time that we morph this call AND it has register arguments.
        // Follow into the code below and do the 'defer or eval to temp' analysis.

        call->fgArgInfo->SortArgs();

        call->fgArgInfo->EvalArgsToTemps();

        // We may have updated the arguments
        if (call->gtCallArgs)
        {
            UpdateGT_LISTFlags(call->gtCallArgs);
        }
    }

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING

    // Rewrite the struct args to be passed by value on stack or in registers.
    fgMorphSystemVStructArgs(call, hasStructArgument);

#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING

#ifndef LEGACY_BACKEND
    // In the future we can migrate UNIX_AMD64 to use this
    // method instead of fgMorphSystemVStructArgs

    // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
    if (hasMultiregStructArgs)
    {
        fgMorphMultiregStructArgs(call);
    }
#endif // LEGACY_BACKEND

#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING

#ifdef DEBUG
    if (verbose)
    {
        fgArgInfoPtr argInfo = call->fgArgInfo;
        for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
        {
            fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
            curArgEntry->Dump();
        }
    }
#endif

    return call;
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif

#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
// fgMorphSystemVStructArgs:
//   Rewrite the struct args to be passed by value on stack or in registers.
//
// args:
//   call: The call whose arguments need to be morphed.
//   hasStructArgument: Whether this call has struct arguments.
//
void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
{
    unsigned   flagsSummary = 0;
    GenTreePtr args;
    GenTreePtr argx;

    if (hasStructArgument)
    {
        fgArgInfoPtr allArgInfo = call->fgArgInfo;

        for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
        {
            // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
            // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
            // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
            // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
            // otherwise points to the list in the late args list.
            bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
            fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
            assert(fgEntryPtr != nullptr);
            GenTreePtr argx     = fgEntryPtr->node;
            GenTreePtr lateList = nullptr;
            GenTreePtr lateNode = nullptr;

            if (isLateArg)
            {
                for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
                {
                    assert(list->OperIsList());

                    GenTreePtr argNode = list->Current();
                    if (argx == argNode)
                    {
                        lateList = list;
                        lateNode = argNode;
                        break;
                    }
                }
                assert(lateList != nullptr && lateNode != nullptr);
            }
            GenTreePtr arg            = argx;
            bool       argListCreated = false;

            var_types type = arg->TypeGet();

            if (varTypeIsStruct(type))
            {
                var_types originalType = type;
                // If we have already processed the arg...
                if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
                {
                    continue;
                }

                // If already OBJ it is set properly already.
                if (arg->OperGet() == GT_OBJ)
                {
                    assert(!fgEntryPtr->structDesc.passedInRegisters);
                    continue;
                }

                assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
                       (arg->OperGet() == GT_ADDR &&
                        (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));

                GenTreeLclVarCommon* lclCommon =
                    arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
                if (fgEntryPtr->structDesc.passedInRegisters)
                {
                    if (fgEntryPtr->structDesc.eightByteCount == 1)
                    {
                        // Change the type and below the code will change the LclVar to a LCL_FLD
                        type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
                                                                 fgEntryPtr->structDesc.eightByteSizes[0]);
                    }
                    else if (fgEntryPtr->structDesc.eightByteCount == 2)
                    {
                        // Create LCL_FLD for each eightbyte.
                        argListCreated = true;

                        // First eightbyte.
                        arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
                        arg->gtType =
                            GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
                                                              fgEntryPtr->structDesc.eightByteSizes[0]);
                        GenTreeFieldList* fieldList =
                            new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
                        fieldList->gtType = originalType; // Preserve the type. It is a special case.
                        arg               = fieldList;

                        // Second eightbyte.
                        GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
                            GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
                                                                                .eightByteClassifications[1],
                                                                            fgEntryPtr->structDesc.eightByteSizes[1]),
                                          lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);

                        fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
                        fieldList->gtType       = originalType; // Preserve the type. It is a special case.
                        newLclField->gtFieldSeq = FieldSeqStore::NotAField();
                    }
                    else
                    {
                        assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
                                                                                       // for the CLR.
                    }
                }

                // If we didn't change the type of the struct, it means
                // its classification doesn't support to be passed directly through a
                // register, so we need to pass a pointer to the destination where
                // where we copied the struct to.
                if (!argListCreated)
                {
                    if (fgEntryPtr->structDesc.passedInRegisters)
                    {
                        arg->gtType = type;
                    }
                    else
                    {
                        // Make sure this is an addr node.
                        if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
                        {
                            arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
                        }

                        assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);

                        // Create an Obj of the temp to use it as a call argument.
                        arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
                    }
                }
            }

            if (argx != arg)
            {
                bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
                fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
                assert(fgEntryPtr != nullptr);
                GenTreePtr argx     = fgEntryPtr->node;
                GenTreePtr lateList = nullptr;
                GenTreePtr lateNode = nullptr;
                if (isLateArg)
                {
                    for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
                    {
                        assert(list->OperIsList());

                        GenTreePtr argNode = list->Current();
                        if (argx == argNode)
                        {
                            lateList = list;
                            lateNode = argNode;
                            break;
                        }
                    }
                    assert(lateList != nullptr && lateNode != nullptr);
                }

                fgEntryPtr->node = arg;
                if (isLateArg)
                {
                    lateList->gtOp.gtOp1 = arg;
                }
                else
                {
                    args->gtOp.gtOp1 = arg;
                }
            }
        }
    }

    // Update the flags
    call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING

//-----------------------------------------------------------------------------
// fgMorphMultiregStructArgs:  Locate the TYP_STRUCT arguments and
//                             call fgMorphMultiregStructArg on each of them.
//
// Arguments:
//    call:    a GenTreeCall node that has one or more TYP_STRUCT arguments
//
// Notes:
//    We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
//    The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
//    which is only used for struct arguments.
//    If this method fails to find any TYP_STRUCT arguments it will assert.
//
void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
{
    GenTreePtr   args;
    GenTreePtr   argx;
    bool         foundStructArg = false;
    unsigned     initialFlags   = call->gtFlags;
    unsigned     flagsSummary   = 0;
    fgArgInfoPtr allArgInfo     = call->fgArgInfo;

    // Currently ARM64/ARM is using this method to morph the MultiReg struct args
    //  in the future AMD64_UNIX will also use this method
    CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef _TARGET_X86_
    assert(!"Logic error: no MultiregStructArgs for X86");
#endif
#ifdef _TARGET_AMD64_
#if defined(UNIX_AMD64_ABI)
    NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
#else  // WINDOWS_AMD64_ABI
    assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
#endif // !UNIX_AMD64_ABI
#endif

    for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
    {
        // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
        // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
        // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
        // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
        // otherwise points to the list in the late args list.
        bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
        fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
        assert(fgEntryPtr != nullptr);
        GenTreePtr argx     = fgEntryPtr->node;
        GenTreePtr lateList = nullptr;
        GenTreePtr lateNode = nullptr;

        if (isLateArg)
        {
            for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
            {
                assert(list->OperIsList());

                GenTreePtr argNode = list->Current();
                if (argx == argNode)
                {
                    lateList = list;
                    lateNode = argNode;
                    break;
                }
            }
            assert(lateList != nullptr && lateNode != nullptr);
        }

        GenTreePtr arg = argx;

        if (arg->TypeGet() == TYP_STRUCT)
        {
            foundStructArg = true;

            arg = fgMorphMultiregStructArg(arg, fgEntryPtr);

            // Did we replace 'argx' with a new tree?
            if (arg != argx)
            {
                fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node

                // link the new arg node into either the late arg list or the gtCallArgs list
                if (isLateArg)
                {
                    lateList->gtOp.gtOp1 = arg;
                }
                else
                {
                    args->gtOp.gtOp1 = arg;
                }
            }
        }
    }

    // We should only call this method when we actually have one or more multireg struct args
    assert(foundStructArg);

    // Update the flags
    call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
}

//-----------------------------------------------------------------------------
// fgMorphMultiregStructArg:  Given a multireg TYP_STRUCT arg from a call argument list
//   Morph the argument into a set of GT_FIELD_LIST nodes.
//
// Arguments:
//     arg        - A GenTree node containing a TYP_STRUCT arg that
//                  is to be passed in multiple registers
//     fgEntryPtr - the fgArgTabEntry information for the current 'arg'
//
// Notes:
//    arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
//    for passing in multiple registers.
//    If arg is a LclVar we check if it is struct promoted and has the right number of fields
//    and if they are at the appropriate offsets we will use the struct promted fields
//    in the GT_FIELD_LIST nodes that we create.
//    If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
//    we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
//    this also forces the struct to be stack allocated into the local frame.
//    For the GT_OBJ case will clone the address expression and generate two (or more)
//    indirections.
//    Currently the implementation handles ARM64/ARM and will NYI for other architectures.
//
GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
{
    assert(arg->TypeGet() == TYP_STRUCT);

#ifndef _TARGET_ARMARCH_
    NYI("fgMorphMultiregStructArg requires implementation for this target");
#endif

#if FEATURE_MULTIREG_ARGS
    // Examine 'arg' and setup argValue objClass and structSize
    //
    CORINFO_CLASS_HANDLE objClass   = NO_CLASS_HANDLE;
    GenTreePtr           argValue   = arg; // normally argValue will be arg, but see right below
    unsigned             structSize = 0;

    if (arg->OperGet() == GT_OBJ)
    {
        GenTreeObj* argObj = arg->AsObj();
        objClass           = argObj->gtClass;
        structSize         = info.compCompHnd->getClassSize(objClass);

        // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
        //
        if (argObj->gtOp1->OperGet() == GT_ADDR)
        {
            argValue = argObj->gtOp1->gtOp.gtOp1;
        }
    }
    else if (arg->OperGet() == GT_LCL_VAR)
    {
        GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
        unsigned             varNum  = varNode->gtLclNum;
        assert(varNum < lvaCount);
        LclVarDsc* varDsc = &lvaTable[varNum];

        objClass   = lvaGetStruct(varNum);
        structSize = varDsc->lvExactSize;
    }
    noway_assert(objClass != nullptr);

    var_types hfaType                 = TYP_UNDEF;
    var_types elemType                = TYP_UNDEF;
    unsigned  elemCount               = 0;
    unsigned  elemSize                = 0;
    var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0

    hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
    if (varTypeIsFloating(hfaType))
    {
        elemType  = hfaType;
        elemSize  = genTypeSize(elemType);
        elemCount = structSize / elemSize;
        assert(elemSize * elemCount == structSize);
        for (unsigned inx = 0; inx < elemCount; inx++)
        {
            type[inx] = elemType;
        }
    }
    else
    {
#ifdef _TARGET_ARM64_
        assert(structSize <= 2 * TARGET_POINTER_SIZE);
#elif defined(_TARGET_ARM_)
        assert(structSize <= 4 * TARGET_POINTER_SIZE);
#endif

#ifdef _TARGET_ARM64_
        BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
        info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
        elemCount = 2;
        type[0]   = getJitGCType(gcPtrs[0]);
        type[1]   = getJitGCType(gcPtrs[1]);
#elif defined(_TARGET_ARM_)
        BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
        elemCount      = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
        info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
        for (unsigned inx = 0; inx < elemCount; inx++)
        {
            type[inx] = getJitGCType(gcPtrs[inx]);
        }
#endif // _TARGET_ARM_

        if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
        {
            elemSize = TARGET_POINTER_SIZE;
            // We can safely widen this to aligned bytes since we are loading from
            // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
            // lives in the stack frame or will be a promoted field.
            //
            structSize = elemCount * TARGET_POINTER_SIZE;
        }
        else // we must have a GT_OBJ
        {
            assert(argValue->OperGet() == GT_OBJ);

            // We need to load the struct from an arbitrary address
            // and we can't read past the end of the structSize
            // We adjust the last load type here
            //
            unsigned remainingBytes = structSize % TARGET_POINTER_SIZE;
            unsigned lastElem       = elemCount - 1;
            if (remainingBytes != 0)
            {
                switch (remainingBytes)
                {
                    case 1:
                        type[lastElem] = TYP_BYTE;
                        break;
                    case 2:
                        type[lastElem] = TYP_SHORT;
                        break;
#ifdef _TARGET_ARM64_
                    case 4:
                        type[lastElem] = TYP_INT;
                        break;
#endif // _TARGET_ARM64_
                    default:
                        noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
                        break;
                }
            }
        }
    }
    // We should still have a TYP_STRUCT
    assert(argValue->TypeGet() == TYP_STRUCT);

    GenTreeFieldList* newArg = nullptr;

    // Are we passing a struct LclVar?
    //
    if (argValue->OperGet() == GT_LCL_VAR)
    {
        GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
        unsigned             varNum  = varNode->gtLclNum;
        assert(varNum < lvaCount);
        LclVarDsc* varDsc = &lvaTable[varNum];

        // At this point any TYP_STRUCT LclVar must be an aligned struct
        // or an HFA struct, both which are passed by value.
        //
        assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());

        varDsc->lvIsMultiRegArg = true;

#ifdef DEBUG
        if (verbose)
        {
            JITDUMP("Multireg struct argument V%02u : ");
            fgEntryPtr->Dump();
        }
#endif // DEBUG

        // This local variable must match the layout of the 'objClass' type exactly
        if (varDsc->lvIsHfa())
        {
            // We have a HFA struct
            noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
            noway_assert(elemSize == genTypeSize(elemType));
            noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
            noway_assert(elemSize * elemCount == varDsc->lvExactSize);

            for (unsigned inx = 0; (inx < elemCount); inx++)
            {
                noway_assert(type[inx] == elemType);
            }
        }
        else
        {
#ifdef _TARGET_ARM64_
            // We must have a 16-byte struct (non-HFA)
            noway_assert(elemCount == 2);
#elif defined(_TARGET_ARM_)
            noway_assert(elemCount <= 4);
#endif

            for (unsigned inx = 0; inx < elemCount; inx++)
            {
                CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];

                // We setup the type[inx] value above using the GC info from 'objClass'
                // This GT_LCL_VAR must have the same GC layout info
                //
                if (currentGcLayoutType != TYPE_GC_NONE)
                {
                    noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
                }
                else
                {
                    // We may have use a small type when we setup the type[inx] values above
                    // We can safely widen this to TYP_I_IMPL
                    type[inx] = TYP_I_IMPL;
                }
            }
        }

#ifdef _TARGET_ARM64_
        // Is this LclVar a promoted struct with exactly 2 fields?
        // TODO-ARM64-CQ: Support struct promoted HFA types here
        if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
        {
            // See if we have two promoted fields that start at offset 0 and 8?
            unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
            unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);

            // Did we find the promoted fields at the necessary offsets?
            if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
            {
                LclVarDsc* loVarDsc = &lvaTable[loVarNum];
                LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];

                var_types loType = loVarDsc->lvType;
                var_types hiType = hiVarDsc->lvType;

                if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
                {
                    // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
                    // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
                    //
                    JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
                            varNum);
                    //
                    // we call lvaSetVarDoNotEnregister and do the proper transformation below.
                    //
                }
                else
                {
                    // We can use the struct promoted field as the two arguments

                    GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
                    GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);

                    // Create a new tree for 'arg'
                    //    replace the existing LDOBJ(ADDR(LCLVAR))
                    //    with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
                    //
                    newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
                    (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
                }
            }
        }
        else
        {
            //
            // We will create a list of GT_LCL_FLDs nodes to pass this struct
            //
            lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
        }
#elif defined(_TARGET_ARM_)
        // Is this LclVar a promoted struct with exactly same size?
        if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa())
        {
            // See if we have promoted fields?
            unsigned varNums[4];
            bool     hasBadVarNum = false;
            for (unsigned inx = 0; inx < elemCount; inx++)
            {
                varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx);
                if (varNums[inx] == BAD_VAR_NUM)
                {
                    hasBadVarNum = true;
                    break;
                }
            }

            // Did we find the promoted fields at the necessary offsets?
            if (!hasBadVarNum)
            {
                LclVarDsc* varDscs[4];
                var_types  varType[4];
                bool       varIsFloat = false;

                for (unsigned inx = 0; inx < elemCount; inx++)
                {
                    varDscs[inx] = &lvaTable[varNums[inx]];
                    varType[inx] = varDscs[inx]->lvType;
                    if (varTypeIsFloating(varType[inx]))
                    {
                        // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the
                        // integer
                        // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
                        //
                        JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
                                varNum);
                        //
                        // we call lvaSetVarDoNotEnregister and do the proper transformation below.
                        //
                        varIsFloat = true;
                        break;
                    }
                }

                if (!varIsFloat)
                {
                    unsigned          offset    = 0;
                    GenTreeFieldList* listEntry = nullptr;
                    // We can use the struct promoted field as arguments
                    for (unsigned inx = 0; inx < elemCount; inx++)
                    {
                        GenTreePtr lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]);
                        // Create a new tree for 'arg'
                        //    replace the existing LDOBJ(ADDR(LCLVAR))
                        listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry);
                        if (newArg == nullptr)
                        {
                            newArg = listEntry;
                        }
                        offset += TARGET_POINTER_SIZE;
                    }
                }
            }
        }
        else
        {
            //
            // We will create a list of GT_LCL_FLDs nodes to pass this struct
            //
            lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
        }
#endif // _TARGET_ARM_
    }

    // If we didn't set newarg to a new List Node tree
    //
    if (newArg == nullptr)
    {
        if (fgEntryPtr->regNum == REG_STK)
        {
            // We leave this stack passed argument alone
            return arg;
        }

        // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
        // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
        //
        if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
        {
            GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
            unsigned             varNum  = varNode->gtLclNum;
            assert(varNum < lvaCount);
            LclVarDsc* varDsc = &lvaTable[varNum];

            unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
            unsigned lastOffset = baseOffset + (elemCount * elemSize);

            // The allocated size of our LocalVar must be at least as big as lastOffset
            assert(varDsc->lvSize() >= lastOffset);

            if (varDsc->lvStructGcCount > 0)
            {
                // alignment of the baseOffset is required
                noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
                noway_assert(elemSize == TARGET_POINTER_SIZE);
                unsigned    baseIndex = baseOffset / TARGET_POINTER_SIZE;
                const BYTE* gcPtrs    = varDsc->lvGcLayout; // Get the GC layout for the local variable
                for (unsigned inx = 0; (inx < elemCount); inx++)
                {
                    // The GC information must match what we setup using 'objClass'
                    noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
                }
            }
            else //  this varDsc contains no GC pointers
            {
                for (unsigned inx = 0; inx < elemCount; inx++)
                {
                    // The GC information must match what we setup using 'objClass'
                    noway_assert(!varTypeIsGC(type[inx]));
                }
            }

            //
            // We create a list of GT_LCL_FLDs nodes to pass this struct
            //
            lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));

            // Create a new tree for 'arg'
            //    replace the existing LDOBJ(ADDR(LCLVAR))
            //    with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
            //
            unsigned          offset    = baseOffset;
            GenTreeFieldList* listEntry = nullptr;
            for (unsigned inx = 0; inx < elemCount; inx++)
            {
                elemSize              = genTypeSize(type[inx]);
                GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
                listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
                if (newArg == nullptr)
                {
                    newArg = listEntry;
                }
                offset += elemSize;
            }
        }
        // Are we passing a GT_OBJ struct?
        //
        else if (argValue->OperGet() == GT_OBJ)
        {
            GenTreeObj* argObj   = argValue->AsObj();
            GenTreePtr  baseAddr = argObj->gtOp1;
            var_types   addrType = baseAddr->TypeGet();

            // Create a new tree for 'arg'
            //    replace the existing LDOBJ(EXPR)
            //    with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
            //

            unsigned          offset    = 0;
            GenTreeFieldList* listEntry = nullptr;
            for (unsigned inx = 0; inx < elemCount; inx++)
            {
                elemSize           = genTypeSize(type[inx]);
                GenTreePtr curAddr = baseAddr;
                if (offset != 0)
                {
                    GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
                    noway_assert(baseAddrDup != nullptr);
                    curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
                }
                else
                {
                    curAddr = baseAddr;
                }
                GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);

                // For safety all GT_IND should have at least GT_GLOB_REF set.
                curItem->gtFlags |= GTF_GLOB_REF;
                if (fgAddrCouldBeNull(curItem))
                {
                    // This indirection can cause a GPF if the address could be null.
                    curItem->gtFlags |= GTF_EXCEPT;
                }

                listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
                if (newArg == nullptr)
                {
                    newArg = listEntry;
                }
                offset += elemSize;
            }
        }
    }

#ifdef DEBUG
    // If we reach here we should have set newArg to something
    if (newArg == nullptr)
    {
        gtDispTree(argValue);
        assert(!"Missing case in fgMorphMultiregStructArg");
    }

    if (verbose)
    {
        printf("fgMorphMultiregStructArg created tree:\n");
        gtDispTree(newArg);
    }
#endif

    arg = newArg; // consider calling fgMorphTree(newArg);

#endif // FEATURE_MULTIREG_ARGS

    return arg;
}

// Make a copy of a struct variable if necessary, to pass to a callee.
// returns: tree that computes address of the outgoing arg
void Compiler::fgMakeOutgoingStructArgCopy(
    GenTreeCall*         call,
    GenTree*             args,
    unsigned             argIndex,
    CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
        const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
{
    GenTree* argx = args->Current();
    noway_assert(argx->gtOper != GT_MKREFANY);
    // See if we need to insert a copy at all
    // Case 1: don't need a copy if it is the last use of a local.  We can't determine that all of the time
    // but if there is only one use and no loops, the use must be last.
    GenTreeLclVarCommon* lcl = nullptr;
    if (argx->OperIsLocal())
    {
        lcl = argx->AsLclVarCommon();
    }
    else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
    {
        lcl = argx->AsObj()->Addr()->AsLclVarCommon();
    }
    if (lcl != nullptr)
    {
        unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
        if (lvaIsImplicitByRefLocal(varNum))
        {
            LclVarDsc* varDsc = &lvaTable[varNum];
            // JIT_TailCall helper has an implicit assumption that all tail call arguments live
            // on the caller's frame. If an argument lives on the caller caller's frame, it may get
            // overwritten if that frame is reused for the tail call. Therefore, we should always copy
            // struct parameters if they are passed as arguments to a tail call.
            if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
            {
                varDsc->lvRefCnt    = 0;
                args->gtOp.gtOp1    = lcl;
                fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
                fp->node            = lcl;

                JITDUMP("did not have to make outgoing copy for V%2d", varNum);
                return;
            }
        }
    }

    if (fgOutgoingArgTemps == nullptr)
    {
        fgOutgoingArgTemps = hashBv::Create(this);
    }

    unsigned tmp   = 0;
    bool     found = false;

    // Attempt to find a local we have already used for an outgoing struct and reuse it.
    // We do not reuse within a statement.
    if (!opts.MinOpts())
    {
        indexType lclNum;
        FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
        {
            LclVarDsc* varDsc = &lvaTable[lclNum];
            if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
                !fgCurrentlyInUseArgTemps->testBit(lclNum))
            {
                tmp   = (unsigned)lclNum;
                found = true;
                JITDUMP("reusing outgoing struct arg");
                break;
            }
        }
        NEXT_HBV_BIT_SET;
    }

    // Create the CopyBlk tree and insert it.
    if (!found)
    {
        // Get a new temp
        // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
        tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
        lvaSetStruct(tmp, copyBlkClass, false);
        fgOutgoingArgTemps->setBit(tmp);
    }

    fgCurrentlyInUseArgTemps->setBit(tmp);

    // TYP_SIMD structs should not be enregistered, since ABI requires it to be
    // allocated on stack and address of it needs to be passed.
    if (lclVarIsSIMDType(tmp))
    {
        lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
    }

    // Create a reference to the temp
    GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
    dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.

    // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
    // to ref counting of the lclVars.
    lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);

    GenTreePtr src;
    if (argx->gtOper == GT_OBJ)
    {
        argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
    }
    else
    {
        argx->gtFlags |= GTF_DONT_CSE;
    }

    // Copy the valuetype to the temp
    unsigned   size    = info.compCompHnd->getClassSize(copyBlkClass);
    GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
    copyBlk            = fgMorphCopyBlock(copyBlk);

#if FEATURE_FIXED_OUT_ARGS

    // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
    // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
    GenTreePtr arg = copyBlk;

#else // FEATURE_FIXED_OUT_ARGS

    // Structs are always on the stack, and thus never need temps
    // so we have to put the copy and temp all into one expression
    GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));

    // Change the expression to "(tmp=val),tmp"
    arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);

#endif // FEATURE_FIXED_OUT_ARGS

    args->gtOp.gtOp1 = arg;
    call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);

    return;
}

#ifdef _TARGET_ARM_
// See declaration for specification comment.
void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
                                                   unsigned   firstArgRegNum,
                                                   regMaskTP* pArgSkippedRegMask)
{
    assert(varDsc->lvPromoted);
    // There's no way to do these calculations without breaking abstraction and assuming that
    // integer register arguments are consecutive ints.  They are on ARM.

    // To start, figure out what register contains the last byte of the first argument.
    LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
    unsigned   lastFldRegOfLastByte =
        (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
    ;

    // Now we're keeping track of the register that the last field ended in; see what registers
    // subsequent fields start in, and whether any are skipped.
    // (We assume here the invariant that the fields are sorted in offset order.)
    for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
    {
        unsigned   fldVarNum    = varDsc->lvFieldLclStart + fldVarOffset;
        LclVarDsc* fldVarDsc    = &lvaTable[fldVarNum];
        unsigned   fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
        assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
        // This loop should enumerate the offsets of any registers skipped.
        // Find what reg contains the last byte:
        // And start at the first register after that.  If that isn't the first reg of the current
        for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
             skippedRegOffsets++)
        {
            // If the register number would not be an arg reg, we're done.
            if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
                return;
            *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
        }
        lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
    }
}

#endif // _TARGET_ARM_

//****************************************************************************
//  fgFixupStructReturn:
//    The companion to impFixupCallStructReturn.  Now that the importer is done
//    change the gtType to the precomputed native return type
//    requires that callNode currently has a struct type
//
void Compiler::fgFixupStructReturn(GenTreePtr callNode)
{
    assert(varTypeIsStruct(callNode));

    GenTreeCall* call              = callNode->AsCall();
    bool         callHasRetBuffArg = call->HasRetBufArg();
    bool         isHelperCall      = call->IsHelperCall();

    // Decide on the proper return type for this call that currently returns a struct
    //
    CORINFO_CLASS_HANDLE        retClsHnd = call->gtRetClsHnd;
    Compiler::structPassingKind howToReturnStruct;
    var_types                   returnType;

    // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
    // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
    //
    //    CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
    //    CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
    //    CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
    //
    if (isHelperCall)
    {
        assert(!callHasRetBuffArg);
        assert(retClsHnd == NO_CLASS_HANDLE);

        // Now that we are past the importer, re-type this node
        howToReturnStruct = SPK_PrimitiveType;
        returnType        = (var_types)call->gtReturnType;
    }
    else
    {
        returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
    }

    if (howToReturnStruct == SPK_ByReference)
    {
        assert(returnType == TYP_UNKNOWN);
        assert(callHasRetBuffArg);
    }
    else
    {
        assert(returnType != TYP_UNKNOWN);

        if (returnType != TYP_STRUCT)
        {
            // Widen the primitive type if necessary
            returnType = genActualType(returnType);
        }
        call->gtType = returnType;
    }

#if FEATURE_MULTIREG_RET
    // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
    assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
#else // !FEATURE_MULTIREG_RET
    // No more struct returns
    assert(call->TypeGet() != TYP_STRUCT);
#endif

#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
    // If it was a struct return, it has been transformed into a call
    // with a return buffer (that returns TYP_VOID) or into a return
    // of a primitive/enregisterable type
    assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
#endif
}

/*****************************************************************************
 *
 *  A little helper used to rearrange nested commutative operations. The
 *  effect is that nested associative, commutative operations are transformed
 *  into a 'left-deep' tree, i.e. into something like this:
 *
 *      (((a op b) op c) op d) op...
 */

#if REARRANGE_ADDS

void Compiler::fgMoveOpsLeft(GenTreePtr tree)
{
    GenTreePtr op1;
    GenTreePtr op2;
    genTreeOps oper;

    do
    {
        op1  = tree->gtOp.gtOp1;
        op2  = tree->gtOp.gtOp2;
        oper = tree->OperGet();

        noway_assert(GenTree::OperIsCommutative(oper));
        noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
        noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
        noway_assert(oper == op2->gtOper);

        // Commutativity doesn't hold if overflow checks are needed

        if (tree->gtOverflowEx() || op2->gtOverflowEx())
        {
            return;
        }

        if (gtIsActiveCSE_Candidate(op2))
        {
            // If we have marked op2 as a CSE candidate,
            // we can't perform a commutative reordering
            // because any value numbers that we computed for op2
            // will be incorrect after performing a commutative reordering
            //
            return;
        }

        if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
        {
            return;
        }

        // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
        if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
        {
            return;
        }

        if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
        {
            // We could deal with this, but we were always broken and just hit the assert
            // below regarding flags, which means it's not frequent, so will just bail out.
            // See #195514
            return;
        }

        noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());

        GenTreePtr ad1 = op2->gtOp.gtOp1;
        GenTreePtr ad2 = op2->gtOp.gtOp2;

        // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
        // We can not reorder such GT_OR trees
        //
        if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
        {
            break;
        }

        /* Change "(x op (y op z))" to "(x op y) op z" */
        /* ie.    "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */

        GenTreePtr new_op1 = op2;

        new_op1->gtOp.gtOp1 = op1;
        new_op1->gtOp.gtOp2 = ad1;

        /* Change the flags. */

        // Make sure we arent throwing away any flags
        noway_assert((new_op1->gtFlags &
                      ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
                        GTF_REVERSE_OPS |             // The reverse ops flag also can be set, it will be re-calculated
                        GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);

        new_op1->gtFlags =
            (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
            (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);

        /* Retype new_op1 if it has not/become a GC ptr. */

        if (varTypeIsGC(op1->TypeGet()))
        {
            noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
                          oper == GT_ADD) || // byref(ref + (int+int))
                         (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
                          oper == GT_OR)); // int(gcref | int(gcref|intval))

            new_op1->gtType = tree->gtType;
        }
        else if (varTypeIsGC(ad2->TypeGet()))
        {
            // Neither ad1 nor op1 are GC. So new_op1 isnt either
            noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
            new_op1->gtType = TYP_I_IMPL;
        }

        // If new_op1 is a new expression. Assign it a new unique value number.
        // vnStore is null before the ValueNumber phase has run
        if (vnStore != nullptr)
        {
            // We can only keep the old value number on new_op1 if both op1 and ad2
            // have the same non-NoVN value numbers. Since op is commutative, comparing
            // only ad2 and op1 is enough.
            if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
                (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
                (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
            {
                new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
            }
        }

        tree->gtOp.gtOp1 = new_op1;
        tree->gtOp.gtOp2 = ad2;

        /* If 'new_op1' is now the same nested op, process it recursively */

        if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
        {
            fgMoveOpsLeft(new_op1);
        }

        /* If   'ad2'   is now the same nested op, process it
         * Instead of recursion, we set up op1 and op2 for the next loop.
         */

        op1 = new_op1;
        op2 = ad2;
    } while ((op2->gtOper == oper) && !op2->gtOverflowEx());

    return;
}

#endif

/*****************************************************************************/

void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
{
    GenTreeBoundsChk* bndsChk = nullptr;
    SpecialCodeKind   kind    = SCK_RNGCHK_FAIL;

#ifdef FEATURE_SIMD
    if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
#else  // FEATURE_SIMD
    if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
#endif // FEATURE_SIMD
    {
        bndsChk = tree->AsBoundsChk();
        kind    = tree->gtBoundsChk.gtThrowKind;
    }
    else
    {
        noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
    }

#ifdef _TARGET_X86_
    unsigned callStkDepth = fgPtrArgCntCur;
#else
    // only x86 pushes args
    const unsigned callStkDepth = 0;
#endif

    if (opts.MinOpts())
    {
        delay = false;

        // we need to initialize this field
        if (fgGlobalMorph && bndsChk != nullptr)
        {
            bndsChk->gtStkDepth = callStkDepth;
        }
    }

    if (!opts.compDbgCode)
    {
        if (delay || compIsForInlining())
        {
            /*  We delay this until after loop-oriented range check
                analysis. For now we merely store the current stack
                level in the tree node.
             */
            if (bndsChk != nullptr)
            {
                noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
                bndsChk->gtStkDepth = callStkDepth;
            }
        }
        else
        {
            /* Create/find the appropriate "range-fail" label */

            // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
            noway_assert((bndsChk != nullptr) || fgGlobalMorph);

            unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;

            BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);

            /* Add the label to the indirection node */

            if (bndsChk != nullptr)
            {
                bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
            }
        }
    }
}

/*****************************************************************************
 *
 *  Expand a GT_INDEX node and fully morph the child operands
 *
 *  The orginal GT_INDEX node is bashed into the GT_IND node that accesses
 *  the array element.  We expand the GT_INDEX node into a larger tree that
 *  evaluates the array base and index.  The simplest expansion is a GT_COMMA
 *  with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
 *  For complex array or index expressions one or more GT_COMMA assignments
 *  are inserted so that we only evaluate the array or index expressions once.
 *
 *  The fully expanded tree is then morphed.  This causes gtFoldExpr to
 *  perform local constant prop and reorder the constants in the tree and
 *  fold them.
 *
 *  We then parse the resulting array element expression in order to locate
 *  and label the constants and variables that occur in the tree.
 */

const int MAX_ARR_COMPLEXITY   = 4;
const int MAX_INDEX_COMPLEXITY = 4;

GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
{
    noway_assert(tree->gtOper == GT_INDEX);
    GenTreeIndex* asIndex = tree->AsIndex();

    var_types            elemTyp        = tree->TypeGet();
    unsigned             elemSize       = tree->gtIndex.gtIndElemSize;
    CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;

    noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);

#ifdef FEATURE_SIMD
    if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
    {
        // If this is a SIMD type, this is the point at which we lose the type information,
        // so we need to set the correct type on the GT_IND.
        // (We don't care about the base type here, so we only check, but don't retain, the return value).
        unsigned simdElemSize = 0;
        if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
        {
            assert(simdElemSize == elemSize);
            elemTyp = getSIMDTypeForSize(elemSize);
            // This is the new type of the node.
            tree->gtType = elemTyp;
            // Now set elemStructType to null so that we don't confuse value numbering.
            elemStructType = nullptr;
        }
    }
#endif // FEATURE_SIMD

    GenTreePtr arrRef = asIndex->Arr();
    GenTreePtr index  = asIndex->Index();

    // Set up the the array length's offset into lenOffs
    // And    the the first element's offset into elemOffs
    ssize_t lenOffs;
    ssize_t elemOffs;
    if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
    {
        lenOffs  = offsetof(CORINFO_String, stringLen);
        elemOffs = offsetof(CORINFO_String, chars);
        tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
    }
    else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
    {
        lenOffs  = offsetof(CORINFO_RefArray, length);
        elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
    }
    else // We have a standard array
    {
        lenOffs  = offsetof(CORINFO_Array, length);
        elemOffs = offsetof(CORINFO_Array, u1Elems);
    }

    bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
    bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);

    GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
    GenTreePtr indexDefn  = nullptr; // non-NULL if we need to allocate a temp for the index expression
    GenTreePtr bndsChk    = nullptr;

    // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
    if (chkd)
    {
        GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
        GenTreePtr index2  = nullptr;

        // If the arrRef expression involves an assignment, a call or reads from global memory,
        // then we *must* allocate a temporary in which to "localize" those values,
        // to ensure that the same values are used in the bounds check and the actual
        // dereference.
        // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
        // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
        // complexity is not exposed. (Without that condition there are cases of local struct
        // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
        // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
        //
        if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
            gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
        {
            unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
            arrRefDefn            = gtNewTempAssign(arrRefTmpNum, arrRef);
            arrRef                = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
            arrRef2               = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
        }
        else
        {
            arrRef2 = gtCloneExpr(arrRef);
            noway_assert(arrRef2 != nullptr);
        }

        // If the index expression involves an assignment, a call or reads from global memory,
        // we *must* allocate a temporary in which to "localize" those values,
        // to ensure that the same values are used in the bounds check and the actual
        // dereference.
        // Also we allocate the temporary when the index is sufficiently complex/expensive.
        //
        if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
            (arrRef->OperGet() == GT_FIELD))
        {
            unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
            indexDefn            = gtNewTempAssign(indexTmpNum, index);
            index                = gtNewLclvNode(indexTmpNum, index->TypeGet());
            index2               = gtNewLclvNode(indexTmpNum, index->TypeGet());
        }
        else
        {
            index2 = gtCloneExpr(index);
            noway_assert(index2 != nullptr);
        }

        // Next introduce a GT_ARR_BOUNDS_CHECK node
        var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.

#ifdef _TARGET_64BIT_
        // The CLI Spec allows an array to be indexed by either an int32 or a native int.  In the case
        // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
        // the comparison will have to be widen to 64 bits.
        if (index->TypeGet() == TYP_I_IMPL)
        {
            bndsChkType = TYP_I_IMPL;
        }
#endif // _TARGET_64BIT_

        GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);

        if (bndsChkType != TYP_INT)
        {
            arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
        }

        GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
            GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);

        bndsChk = arrBndsChk;

        // Make sure to increment ref-counts if already ref-counted.
        if (lvaLocalVarRefCounted)
        {
            lvaRecursiveIncRefCounts(index);
            lvaRecursiveIncRefCounts(arrRef);
        }

        // Now we'll switch to using the second copies for arrRef and index
        // to compute the address expression

        arrRef = arrRef2;
        index  = index2;
    }

    // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"

    GenTreePtr addr;

#ifdef _TARGET_64BIT_
    // Widen 'index' on 64-bit targets
    if (index->TypeGet() != TYP_I_IMPL)
    {
        if (index->OperGet() == GT_CNS_INT)
        {
            index->gtType = TYP_I_IMPL;
        }
        else
        {
            index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
        }
    }
#endif // _TARGET_64BIT_

    /* Scale the index value if necessary */
    if (elemSize > 1)
    {
        GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);

        // Fix 392756 WP7 Crossgen
        //
        // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
        // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
        // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
        //
        size->gtFlags |= GTF_DONT_CSE;

        /* Multiply by the array element size */
        addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
    }
    else
    {
        addr = index;
    }

    /* Add the object ref to the element's offset */

    addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);

    /* Add the first element's offset */

    GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);

    addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);

#if SMALL_TREE_NODES
    assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
#endif

    // Change the orginal GT_INDEX node into a GT_IND node
    tree->SetOper(GT_IND);

    // If the index node is a floating-point type, notify the compiler
    // we'll potentially use floating point registers at the time of codegen.
    if (varTypeIsFloating(tree->gtType))
    {
        this->compFloatingPointUsed = true;
    }

    // We've now consumed the GTF_INX_RNGCHK, and the node
    // is no longer a GT_INDEX node.
    tree->gtFlags &= ~GTF_INX_RNGCHK;

    tree->gtOp.gtOp1 = addr;

    // This is an array index expression.
    tree->gtFlags |= GTF_IND_ARR_INDEX;

    /* An indirection will cause a GPF if the address is null */
    tree->gtFlags |= GTF_EXCEPT;

    if (nCSE)
    {
        tree->gtFlags |= GTF_DONT_CSE;
    }

    // Store information about it.
    GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));

    // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.

    GenTreePtr indTree = tree;

    // Did we create a bndsChk tree?
    if (bndsChk)
    {
        // Use a GT_COMMA node to prepend the array bound check
        //
        tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);

        /* Mark the indirection node as needing a range check */
        fgSetRngChkTarget(bndsChk);
    }

    if (indexDefn != nullptr)
    {
        // Use a GT_COMMA node to prepend the index assignment
        //
        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
    }
    if (arrRefDefn != nullptr)
    {
        // Use a GT_COMMA node to prepend the arRef assignment
        //
        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
    }

    // Currently we morph the tree to perform some folding operations prior
    // to attaching fieldSeq info and labeling constant array index contributions
    //
    fgMorphTree(tree);

    // Ideally we just want to proceed to attaching fieldSeq info and labeling the
    // constant array index contributions, but the morphing operation may have changed
    // the 'tree' into something that now unconditionally throws an exception.
    //
    // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
    // or it could be left unchanged.  If it is unchanged then we should not return,
    // instead we should proceed to attaching fieldSeq info, etc...
    //
    GenTreePtr arrElem = tree->gtEffectiveVal();

    if (fgIsCommaThrow(tree))
    {
        if ((arrElem != indTree) ||         // A new tree node may have been created
            (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
        {
            return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
        }
    }

    assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));

    addr = arrElem->gtOp.gtOp1;

    assert(addr->TypeGet() == TYP_BYREF);

    GenTreePtr cnsOff = nullptr;
    if (addr->OperGet() == GT_ADD)
    {
        if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
        {
            cnsOff = addr->gtOp.gtOp2;
            addr   = addr->gtOp.gtOp1;
        }

        while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
        {
            assert(addr->TypeGet() == TYP_BYREF);
            GenTreePtr index = addr->gtOp.gtOp2;

            // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
            index->LabelIndex(this);

            addr = addr->gtOp.gtOp1;
        }
        assert(addr->TypeGet() == TYP_REF);
    }
    else if (addr->OperGet() == GT_CNS_INT)
    {
        cnsOff = addr;
    }

    FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);

    if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
    {
        // Assign it the [#FirstElem] field sequence
        //
        cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
    }
    else //  We have folded the first element's offset with the index expression
    {
        // Build the [#ConstantIndex, #FirstElem] field sequence
        //
        FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
        FieldSeqNode* fieldSeq          = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);

        if (cnsOff == nullptr) // It must have folded into a zero offset
        {
            // Record in the general zero-offset map.
            GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
        }
        else
        {
            cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
        }
    }

    return tree;
}

#ifdef _TARGET_X86_
/*****************************************************************************
 *
 *  Wrap fixed stack arguments for varargs functions to go through varargs
 *  cookie to access them, except for the cookie itself.
 *
 * Non-x86 platforms are allowed to access all arguments directly
 * so we don't need this code.
 *
 */
GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
{
    /* For the fixed stack arguments of a varargs function, we need to go
        through the varargs cookies to access them, except for the
        cookie itself */

    LclVarDsc* varDsc = &lvaTable[lclNum];

    if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
    {
        // Create a node representing the local pointing to the base of the args
        GenTreePtr ptrArg =
            gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
                          gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
                                        lclOffs));

        // Access the argument through the local
        GenTreePtr tree;
        if (varType == TYP_STRUCT)
        {
            tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
        }
        else
        {
            tree = gtNewOperNode(GT_IND, varType, ptrArg);
        }
        tree->gtFlags |= GTF_IND_TGTANYWHERE;

        if (varDsc->lvAddrExposed)
        {
            tree->gtFlags |= GTF_GLOB_REF;
        }

        return fgMorphTree(tree);
    }

    return NULL;
}
#endif

/*****************************************************************************
 *
 *  Transform the given GT_LCL_VAR tree for code generation.
 */

GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree, bool forceRemorph)
{
    noway_assert(tree->gtOper == GT_LCL_VAR);

    unsigned   lclNum  = tree->gtLclVarCommon.gtLclNum;
    var_types  varType = lvaGetRealType(lclNum);
    LclVarDsc* varDsc  = &lvaTable[lclNum];

    if (varDsc->lvAddrExposed)
    {
        tree->gtFlags |= GTF_GLOB_REF;
    }

#ifdef _TARGET_X86_
    if (info.compIsVarArgs)
    {
        GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
        if (newTree != nullptr)
        {
            if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
            {
                fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
            }
            return newTree;
        }
    }
#endif // _TARGET_X86_

    /* If not during the global morphing phase bail */

    if (!fgGlobalMorph && !forceRemorph)
    {
        return tree;
    }

    bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;

    noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr

    if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
    {
#if LOCAL_ASSERTION_PROP
        /* Assertion prop can tell us to omit adding a cast here */
        if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
        {
            return tree;
        }
#endif
        /* Small-typed arguments and aliased locals are normalized on load.
           Other small-typed locals are normalized on store.
           Also, under the debugger as the debugger could write to the variable.
           If this is one of the former, insert a narrowing cast on the load.
                   ie. Convert: var-short --> cast-short(var-int) */

        tree->gtType = TYP_INT;
        fgMorphTreeDone(tree);
        tree = gtNewCastNode(TYP_INT, tree, varType);
        fgMorphTreeDone(tree);
        return tree;
    }

    return tree;
}

/*****************************************************************************
  Grab a temp for big offset morphing.
  This method will grab a new temp if no temp of this "type" has been created.
  Or it will return the same cached one if it has been created.
*/
unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
{
    unsigned lclNum = fgBigOffsetMorphingTemps[type];

    if (lclNum == BAD_VAR_NUM)
    {
        // We haven't created a temp for this kind of type. Create one now.
        lclNum                         = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
        fgBigOffsetMorphingTemps[type] = lclNum;
    }
    else
    {
        // We better get the right type.
        noway_assert(lvaTable[lclNum].TypeGet() == type);
    }

    noway_assert(lclNum != BAD_VAR_NUM);
    return lclNum;
}

/*****************************************************************************
 *
 *  Transform the given GT_FIELD tree for code generation.
 */

GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
{
    assert(tree->gtOper == GT_FIELD);

    CORINFO_FIELD_HANDLE symHnd          = tree->gtField.gtFldHnd;
    unsigned             fldOffset       = tree->gtField.gtFldOffset;
    GenTreePtr           objRef          = tree->gtField.gtFldObj;
    bool                 fieldMayOverlap = false;
    bool                 objIsLocal      = false;

    if (fgGlobalMorph && (objRef != nullptr) && (objRef->gtOper == GT_ADDR))
    {
        // Make sure we've checked if 'objRef' is an address of an implicit-byref parameter.
        // If it is, fgMorphImplicitByRefArgs may change it do a different opcode, which the
        // simd field rewrites are sensitive to.
        fgMorphImplicitByRefArgs(objRef);
    }

    noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
                 ((tree->gtFlags & GTF_GLOB_REF) != 0));

    if (tree->gtField.gtFldMayOverlap)
    {
        fieldMayOverlap = true;
        // Reset the flag because we may reuse the node.
        tree->gtField.gtFldMayOverlap = false;
    }

#ifdef FEATURE_SIMD
    // if this field belongs to simd struct, translate it to simd instrinsic.
    if (mac == nullptr)
    {
        GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
        if (newTree != tree)
        {
            newTree = fgMorphSmpOp(newTree);
            return newTree;
        }
    }
    else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
    {
        GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
        if (lcl != nullptr)
        {
            lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
        }
    }
#endif

    /* Is this an instance data member? */

    if (objRef)
    {
        GenTreePtr addr;
        objIsLocal = objRef->IsLocal();

        if (tree->gtFlags & GTF_IND_TLS_REF)
        {
            NO_WAY("instance field can not be a TLS ref.");
        }

        /* We'll create the expression "*(objRef + mem_offs)" */

        noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);

        // An optimization for Contextful classes:
        // we unwrap the proxy when we have a 'this reference'
        if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
        {
            objRef = fgUnwrapProxy(objRef);
        }

        /*
            Now we have a tree like this:

                                  +--------------------+
                                  |      GT_FIELD      |   tree
                                  +----------+---------+
                                             |
                              +--------------+-------------+
                              |   tree->gtField.gtFldObj   |
                              +--------------+-------------+


            We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):

                                  +--------------------+
                                  |   GT_IND/GT_OBJ    |   tree
                                  +---------+----------+
                                            |
                                            |
                                  +---------+----------+
                                  |       GT_ADD       |   addr
                                  +---------+----------+
                                            |
                                          /   \
                                        /       \
                                      /           \
                         +-------------------+  +----------------------+
                         |       objRef      |  |     fldOffset        |
                         |                   |  | (when fldOffset !=0) |
                         +-------------------+  +----------------------+


            or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):


                                  +--------------------+
                                  |   GT_IND/GT_OBJ    |   tree
                                  +----------+---------+
                                             |
                                  +----------+---------+
                                  |       GT_COMMA     |  comma2
                                  +----------+---------+
                                             |
                                            / \
                                          /     \
                                        /         \
                                      /             \
                 +---------+----------+               +---------+----------+
           comma |      GT_COMMA      |               |  "+" (i.e. GT_ADD) |   addr
                 +---------+----------+               +---------+----------+
                           |                                     |
                         /   \                                  /  \
                       /       \                              /      \
                     /           \                          /          \
         +-----+-----+             +-----+-----+      +---------+   +-----------+
     asg |  GT_ASG   |         ind |   GT_IND  |      |  tmpLcl |   | fldOffset |
         +-----+-----+             +-----+-----+      +---------+   +-----------+
               |                         |
              / \                        |
            /     \                      |
          /         \                    |
   +-----+-----+   +-----+-----+   +-----------+
   |   tmpLcl  |   |   objRef  |   |   tmpLcl  |
   +-----------+   +-----------+   +-----------+


        */

        var_types objRefType = objRef->TypeGet();

        GenTreePtr comma = nullptr;

        bool addedExplicitNullCheck = false;

        // NULL mac means we encounter the GT_FIELD first.  This denotes a dereference of the field,
        // and thus is equivalent to a MACK_Ind with zero offset.
        MorphAddrContext defMAC(MACK_Ind);
        if (mac == nullptr)
        {
            mac = &defMAC;
        }

        // This flag is set to enable the "conservative" style of explicit null-check insertion.
        // This means that we insert an explicit null check whenever we create byref by adding a
        // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
        // dereferenced).  The alternative is "aggressive", which would not insert such checks (for
        // small offsets); in this plan, we would transfer some null-checking responsibility to
        // callee's of methods taking byref parameters.  They would have to add explicit null checks
        // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
        // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
        // large).  To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
        // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
        // This is left here to point out how to implement it.
        CLANG_FORMAT_COMMENT_ANCHOR;

#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1

        // If the objRef is a GT_ADDR node, it, itself, never requires null checking.  The expression
        // whose address is being taken is either a local or static variable, whose address is necessarily
        // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
        if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
                                          (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
                                           || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
#else
                                           || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
                                               (mac->m_totalOffset + fldOffset > 0))
#endif
                                               )))
        {
#ifdef DEBUG
            if (verbose)
            {
                printf("Before explicit null check morphing:\n");
                gtDispTree(tree);
            }
#endif

            //
            // Create the "comma" subtree
            //
            GenTreePtr asg = nullptr;
            GenTreePtr nullchk;

            unsigned lclNum;

            if (objRef->gtOper != GT_LCL_VAR)
            {
                lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));

                // Create the "asg" node
                asg = gtNewTempAssign(lclNum, objRef);
            }
            else
            {
                lclNum = objRef->gtLclVarCommon.gtLclNum;
            }

            // Create the "nullchk" node.
            // Make it TYP_BYTE so we only deference it for 1 byte.
            GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
            nullchk           = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);

            nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections

            // An indirection will cause a GPF if the address is null.
            nullchk->gtFlags |= GTF_EXCEPT;

            compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
            optMethodFlags |= OMF_HAS_NULLCHECK;

            if (asg)
            {
                // Create the "comma" node.
                comma = gtNewOperNode(GT_COMMA,
                                      TYP_VOID, // We don't want to return anything from this "comma" node.
                                                // Set the type to TYP_VOID, so we can select "cmp" instruction
                                                // instead of "mov" instruction later on.
                                      asg, nullchk);
            }
            else
            {
                comma = nullchk;
            }

            addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.

            addedExplicitNullCheck = true;
        }
        else if (fldOffset == 0)
        {
            // Generate the "addr" node.
            addr = objRef;
            FieldSeqNode* fieldSeq =
                fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
            GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
        }
        else
        {
            addr = objRef;
        }

#ifdef FEATURE_READYTORUN_COMPILER
        if (tree->gtField.gtFieldLookup.addr != nullptr)
        {
            GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);

            if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
            {
                baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
            }

            addr =
                gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
        }
#endif
        if (fldOffset != 0)
        {
            // Generate the "addr" node.
            /* Add the member offset to the object's address */
            FieldSeqNode* fieldSeq =
                fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
            addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
                                 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
        }

        // Now let's set the "tree" as a GT_IND tree.

        tree->SetOper(GT_IND);
        tree->gtOp.gtOp1 = addr;

        if (fgAddrCouldBeNull(addr))
        {
            // This indirection can cause a GPF if the address could be null.
            tree->gtFlags |= GTF_EXCEPT;
        }

        if (addedExplicitNullCheck)
        {
            //
            // Create "comma2" node and link it to "tree".
            //
            GenTreePtr comma2;
            comma2 = gtNewOperNode(GT_COMMA,
                                   addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
                                   comma, addr);
            tree->gtOp.gtOp1 = comma2;
        }

#ifdef DEBUG
        if (verbose)
        {
            if (addedExplicitNullCheck)
            {
                printf("After adding explicit null check:\n");
                gtDispTree(tree);
            }
        }
#endif
    }
    else /* This is a static data member */
    {
        if (tree->gtFlags & GTF_IND_TLS_REF)
        {
            // Thread Local Storage static field reference
            //
            // Field ref is a TLS 'Thread-Local-Storage' reference
            //
            // Build this tree:  IND(*) #
            //                    |
            //                   ADD(I_IMPL)
            //                   / \
            //                  /  CNS(fldOffset)
            //                 /
            //                /
            //               /
            //             IND(I_IMPL) == [Base of this DLL's TLS]
            //              |
            //             ADD(I_IMPL)
            //             / \
            //            /   CNS(IdValue*4) or MUL
            //           /                      / \
            //          IND(I_IMPL)            /  CNS(4)
            //           |                    /
            //          CNS(TLS_HDL,0x2C)    IND
            //                                |
            //                               CNS(pIdAddr)
            //
            // # Denotes the orginal node
            //
            void**   pIdAddr = nullptr;
            unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);

            //
            // If we can we access the TLS DLL index ID value directly
            // then pIdAddr will be NULL and
            //      IdValue will be the actual TLS DLL index ID
            //
            GenTreePtr dllRef = nullptr;
            if (pIdAddr == nullptr)
            {
                if (IdValue != 0)
                {
                    dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
                }
            }
            else
            {
                dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
                dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
                dllRef->gtFlags |= GTF_IND_INVARIANT;

                /* Multiply by 4 */

                dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
            }

#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides

            // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]

            GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);

            tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);

            if (dllRef != nullptr)
            {
                /* Add the dllRef */
                tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
            }

            /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
            tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);

            if (fldOffset != 0)
            {
                FieldSeqNode* fieldSeq =
                    fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
                GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);

                /* Add the TLS static field offset to the address */

                tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
            }

            // Final indirect to get to actual value of TLS static field

            tree->SetOper(GT_IND);
            tree->gtOp.gtOp1 = tlsRef;

            noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
        }
        else
        {
            // Normal static field reference

            //
            // If we can we access the static's address directly
            // then pFldAddr will be NULL and
            //      fldAddr will be the actual address of the static field
            //
            void** pFldAddr = nullptr;
            void*  fldAddr  = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);

            if (pFldAddr == nullptr)
            {
#ifdef _TARGET_64BIT_
                if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
                {
                    // The address is not directly addressible, so force it into a
                    // constant, so we handle it properly

                    GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
                    addr->gtType    = TYP_I_IMPL;
                    FieldSeqNode* fieldSeq =
                        fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
                    addr->gtIntCon.gtFieldSeq = fieldSeq;

                    tree->SetOper(GT_IND);
                    // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
                    // We must clear it when we transform the node.
                    // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
                    // that the logic above does its own checking to determine whether a nullcheck is needed.
                    tree->gtFlags &= ~GTF_IND_ARR_LEN;
                    tree->gtOp.gtOp1 = addr;

                    return fgMorphSmpOp(tree);
                }
                else
#endif // _TARGET_64BIT_
                {
                    // Only volatile or classinit could be set, and they map over
                    noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_FLD_INITCLASS | GTF_COMMON_MASK)) == 0);
                    static_assert_no_msg(GTF_FLD_VOLATILE == GTF_CLS_VAR_VOLATILE);
                    static_assert_no_msg(GTF_FLD_INITCLASS == GTF_CLS_VAR_INITCLASS);
                    tree->SetOper(GT_CLS_VAR);
                    tree->gtClsVar.gtClsVarHnd = symHnd;
                    FieldSeqNode* fieldSeq =
                        fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
                    tree->gtClsVar.gtFieldSeq = fieldSeq;
                }

                return tree;
            }
            else
            {
                GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);

                // There are two cases here, either the static is RVA based,
                // in which case the type of the FIELD node is not a GC type
                // and the handle to the RVA is a TYP_I_IMPL.  Or the FIELD node is
                // a GC type and the handle to it is a TYP_BYREF in the GC heap
                // because handles to statics now go into the large object heap

                var_types  handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
                GenTreePtr op1       = gtNewOperNode(GT_IND, handleTyp, addr);
                op1->gtFlags |= GTF_IND_INVARIANT;

                tree->SetOper(GT_IND);
                tree->gtOp.gtOp1 = op1;
            }
        }
    }
    noway_assert(tree->gtOper == GT_IND);
    // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
    // We must clear it when we transform the node.
    // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
    // that the logic above does its own checking to determine whether a nullcheck is needed.
    tree->gtFlags &= ~GTF_IND_ARR_LEN;

    GenTreePtr res = fgMorphSmpOp(tree);

    // If we have a struct type, this node would previously have been under a GT_ADDR,
    // and therefore would have been marked GTF_DONT_CSE.
    // TODO-1stClassStructs: revisit this.
    if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
    {
        res->gtFlags |= GTF_DONT_CSE;
    }

    if (fldOffset == 0 && res->OperGet() == GT_IND)
    {
        GenTreePtr addr = res->gtOp.gtOp1;
        // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
        FieldSeqNode* fieldSeq =
            fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
        fgAddFieldSeqForZeroOffset(addr, fieldSeq);
    }

    return res;
}

//------------------------------------------------------------------------------
// fgMorphCallInline: attempt to inline a call
//
// Arguments:
//    call         - call expression to inline, inline candidate
//    inlineResult - result tracking and reporting
//
// Notes:
//    Attempts to inline the call.
//
//    If successful, callee's IR is inserted in place of the call, and
//    is marked with an InlineContext.
//
//    If unsuccessful, the transformations done in anticpation of a
//    possible inline are undone, and the candidate flag on the call
//    is cleared.

void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
{
    // The call must be a candiate for inlining.
    assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);

    // Attempt the inline
    fgMorphCallInlineHelper(call, inlineResult);

    // We should have made up our minds one way or another....
    assert(inlineResult->IsDecided());

    // If we failed to inline, we have a bit of work to do to cleanup
    if (inlineResult->IsFailure())
    {

#ifdef DEBUG

        // Before we do any cleanup, create a failing InlineContext to
        // capture details of the inlining attempt.
        m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);

#endif

        // It was an inline candidate, but we haven't expanded it.
        if (call->gtCall.gtReturnType != TYP_VOID)
        {
            // Detach the GT_CALL tree from the original statement by
            // hanging a "nothing" node to it. Later the "nothing" node will be removed
            // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.

            noway_assert(fgMorphStmt->gtStmtExpr == call);
            fgMorphStmt->gtStmtExpr = gtNewNothingNode();
        }

        // Clear the Inline Candidate flag so we can ensure later we tried
        // inlining all candidates.
        //
        call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
    }
}

/*****************************************************************************
 *  Helper to attempt to inline a call
 *  Sets success/failure in inline result
 *  If success, modifies current method's IR with inlinee's IR
 *  If failed, undoes any speculative modifications to current method
 */

void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
{
    // Don't expect any surprises here.
    assert(result->IsCandidate());

    if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
    {
        // For now, attributing this to call site, though it's really
        // more of a budget issue (lvaCount currently includes all
        // caller and prospective callee locals). We still might be
        // able to inline other callees into this caller, or inline
        // this callee in other callers.
        result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
        return;
    }

    if (call->IsVirtual())
    {
        result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
        return;
    }

    // impMarkInlineCandidate() is expected not to mark tail prefixed calls
    // and recursive tail calls as inline candidates.
    noway_assert(!call->IsTailPrefixedCall());
    noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));

    /* If the caller's stack frame is marked, then we can't do any inlining. Period.
       Although we have checked this in impCanInline, it is possible that later IL instructions
       might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
    */

    if (opts.compNeedSecurityCheck)
    {
        result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
        return;
    }

    //
    // Calling inlinee's compiler to inline the method.
    //

    unsigned startVars = lvaCount;

#ifdef DEBUG
    if (verbose)
    {
        printf("Expanding INLINE_CANDIDATE in statement ");
        printTreeID(fgMorphStmt);
        printf(" in BB%02u:\n", compCurBB->bbNum);
        gtDispTree(fgMorphStmt);
        if (call->IsImplicitTailCall())
        {
            printf("Note: candidate is implicit tail call\n");
        }
    }
#endif

    impInlineRoot()->m_inlineStrategy->NoteAttempt(result);

    //
    // Invoke the compiler to inline the call.
    //

    fgInvokeInlineeCompiler(call, result);

    if (result->IsFailure())
    {
        // Undo some changes made in anticipation of inlining...

        // Zero out the used locals
        memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
        for (unsigned i = startVars; i < lvaCount; i++)
        {
            new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
        }

        lvaCount = startVars;

#ifdef DEBUG
        if (verbose)
        {
            // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
        }
#endif

        return;
    }

#ifdef DEBUG
    if (verbose)
    {
        // printf("After inlining lvaCount=%d.\n", lvaCount);
    }
#endif
}

/*****************************************************************************
 *
 * Performs checks to see if this tail call can be optimized as epilog+jmp.
 */
bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
{
#if FEATURE_FASTTAILCALL
    // Reached here means that return types of caller and callee are tail call compatible.
    // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
    //
    // In an implicit tail call case callSig may not be available but it is guaranteed to be available
    // for explicit tail call cases.  The reason implicit tail case callSig may not be available is that
    // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
    // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
    // currently not copying/setting callSig.
    CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef DEBUG
    if (callee->IsTailPrefixedCall())
    {
        assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
                                            (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
    }
#endif

    // Note on vararg methods:
    // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
    // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
    // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
    // out-going area required for callee is bounded by caller's fixed argument space.
    //
    // Note that callee being a vararg method is not a problem since we can account the params being passed.

    // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
    unsigned nCallerArgs = info.compArgsCount;

    // Count the callee args including implicit and hidden.
    // Note that GenericContext and VarargCookie are added by importer while
    // importing the call to gtCallArgs list along with explicit user args.
    unsigned nCalleeArgs = 0;
    if (callee->gtCallObjp) // thisPtr
    {
        nCalleeArgs++;
    }

    if (callee->HasRetBufArg()) // RetBuf
    {
        nCalleeArgs++;

        // If callee has RetBuf param, caller too must have it.
        // Otherwise go the slow route.
        if (info.compRetBuffArg == BAD_VAR_NUM)
        {
            return false;
        }
    }

    // Count user args while tracking whether any of them is a multi-byte params
    // that cannot be passed in a register. Note that we don't need to count
    // non-standard and secret params passed in registers (e.g. R10, R11) since
    // these won't contribute to out-going arg size.
    bool hasMultiByteArgs = false;
    for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
    {
        nCalleeArgs++;

        assert(args->OperIsList());
        GenTreePtr argx = args->gtOp.gtOp1;

        if (varTypeIsStruct(argx))
        {
            // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
            while (argx->gtOper == GT_COMMA)
            {
                argx = argx->gtOp.gtOp2;
            }

            // Get the size of the struct and see if it is register passable.
            CORINFO_CLASS_HANDLE objClass = nullptr;

            if (argx->OperGet() == GT_OBJ)
            {
                objClass = argx->AsObj()->gtClass;
            }
            else if (argx->IsLocal())
            {
                objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
            }
            if (objClass != nullptr)
            {
#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)

                unsigned typeSize = 0;
                hasMultiByteArgs  = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);

#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
                // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
                // Account for the second eightbyte in the nCalleeArgs.
                // https://github.com/dotnet/coreclr/issues/2666
                // TODO-CQ-Amd64-Unix/arm64:  Structs of size between 9 to 16 bytes are conservatively estimated
                //                            as two args, since they need two registers whereas nCallerArgs is
                //                            counting such an arg as one. This would mean we will not be optimizing
                //                            certain calls though technically possible.

                if (typeSize > TARGET_POINTER_SIZE)
                {
                    unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
                    nCalleeArgs += extraArgRegsToAdd;
                }
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_

#else
                assert(!"Target platform ABI rules regarding passing struct type args in registers");
                unreached();
#endif //_TARGET_AMD64_ || _TARGET_ARM64_
            }
            else
            {
                hasMultiByteArgs = true;
            }
        }
    }

    // Go the slow route, if it has multi-byte params
    if (hasMultiByteArgs)
    {
        return false;
    }

    // If we reached here means that callee has only those argument types which can be passed in
    // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
    // If we are passing args on stack for callee and it has more args passed on stack than
    // caller, then fast tail call cannot be performed.
    //
    // Note that the GC'ness of on stack args need not match since the arg setup area is marked
    // as non-interruptible for fast tail calls.
    if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
    {
        return false;
    }

    return true;
#else
    return false;
#endif
}

/*****************************************************************************
 *
 *  Transform the given GT_CALL tree for tail call code generation.
 */
void Compiler::fgMorphTailCall(GenTreeCall* call)
{
    JITDUMP("fgMorphTailCall (before):\n");
    DISPTREE(call);

#if defined(_TARGET_ARM_)
    // For the helper-assisted tail calls, we need to push all the arguments
    // into a single list, and then add a few extra at the beginning

    // Check for PInvoke call types that we don't handle in codegen yet.
    assert(!call->IsUnmanaged());
    assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));

    // First move the this pointer (if any) onto the regular arg list
    GenTreePtr thisPtr = NULL;
    if (call->gtCallObjp)
    {
        GenTreePtr objp  = call->gtCallObjp;
        call->gtCallObjp = NULL;

        if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
        {
            thisPtr      = gtClone(objp, true);
            var_types vt = objp->TypeGet();
            if (thisPtr == NULL)
            {
                // Too complex, so use a temp
                unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
                GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
                if (!call->IsVirtualVtable())
                {
                    // Add an indirection to get the nullcheck
                    GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
                    GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
                    asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
                }
                objp    = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
                thisPtr = gtNewLclvNode(lclNum, vt);
            }
            else if (!call->IsVirtualVtable())
            {
                GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
                objp           = gtNewOperNode(GT_COMMA, vt, ind, objp);
                thisPtr        = gtClone(thisPtr, true);
            }

            call->gtFlags &= ~GTF_CALL_NULLCHECK;
        }

        call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
    }

    // Add the extra VSD parameter if needed
    CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
    if (call->IsVirtualStub())
    {
        flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;

        GenTreePtr arg;
        if (call->gtCallType == CT_INDIRECT)
        {
            arg = gtClone(call->gtCallAddr, true);
            noway_assert(arg != NULL);
        }
        else
        {
            noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
            ssize_t addr = ssize_t(call->gtStubCallStubAddr);
            arg          = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);

            // Change the call type, so we can add the extra indirection here, rather than in codegen
            call->gtCallAddr         = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
            call->gtStubCallStubAddr = NULL;
            call->gtCallType         = CT_INDIRECT;
        }
        // Add the extra indirection to generate the real target
        call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
        call->gtFlags |= GTF_EXCEPT;

        // And push the stub address onto the list of arguments
        call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
    }
    else if (call->IsVirtualVtable())
    {
        // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER

        noway_assert(thisPtr != NULL);

        GenTreePtr add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
        GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
        vtbl->gtFlags |= GTF_EXCEPT;

        unsigned vtabOffsOfIndirection;
        unsigned vtabOffsAfterIndirection;
        info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);

        /* Get the appropriate vtable chunk */

        add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
        vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);

        /* Now the appropriate vtable slot */

        add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
        vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);

        // Switch this to a plain indirect call
        call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
        assert(!call->IsVirtual());
        call->gtCallType = CT_INDIRECT;

        call->gtCallAddr   = vtbl;
        call->gtCallCookie = NULL;
        call->gtFlags |= GTF_EXCEPT;
    }

    // Now inject a placeholder for the real call target that codegen
    // will generate
    GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
    codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
    call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);

    // Lastly inject the pointer for the copy routine
    noway_assert(call->callSig != NULL);
    void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
    arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
    call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);

    // It is now a varargs tail call
    call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
    call->gtFlags &= ~GTF_CALL_POP_ARGS;

#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)

    // x86 classic codegen doesn't require any morphing

    // For the helper-assisted tail calls, we need to push all the arguments
    // into a single list, and then add a few extra at the beginning or end.
    //
    // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
    //
    //      JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
    //
    // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
    // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
    // for callTarget here which will be replaced later with callTarget in tail call lowering.
    //
    // For x86, the tailcall helper is defined as:
    //
    //      JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
    //      callTarget)
    //
    // Note that the special arguments are on the stack, whereas the function arguments follow
    // the normal convention: there might be register arguments in ECX and EDX. The stack will
    // look like (highest address at the top):
    //      first normal stack argument
    //      ...
    //      last normal stack argument
    //      numberOfOldStackArgs
    //      numberOfNewStackArgs
    //      flags
    //      callTarget
    //
    // Each special arg is 4 bytes.
    //
    // 'flags' is a bitmask where:
    //      1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
    //          callee-saved registers for tailcall functions. Note that the helper assumes
    //          that the callee-saved registers live immediately below EBP, and must have been
    //          pushed in this order: EDI, ESI, EBX.
    //      2 == call target is a virtual stub dispatch.
    //
    // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
    // on the custom calling convention.

    // Check for PInvoke call types that we don't handle in codegen yet.
    assert(!call->IsUnmanaged());
    assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));

    // Don't support tail calling helper methods
    assert(call->gtCallType != CT_HELPER);

    // We come this route only for tail prefixed calls that cannot be dispatched as
    // fast tail calls
    assert(!call->IsImplicitTailCall());
    assert(!fgCanFastTailCall(call));

    // First move the 'this' pointer (if any) onto the regular arg list. We do this because
    // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
    // and thus shift where the 'this' pointer will be passed to a later argument slot. In
    // addition, for all platforms, we are going to change the call into a helper call. Our code
    // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
    // do this transformation, we must explicitly create a null 'this' pointer check, if required,
    // since special 'this' pointer handling will no longer kick in.
    //
    // Some call types, such as virtual vtable calls, require creating a call address expression
    // that involves the "this" pointer. Lowering will sometimes create an embedded statement
    // to create a temporary that is assigned to the "this" pointer expression, and then use
    // that temp to create the call address expression. This temp creation embedded statement
    // will occur immediately before the "this" pointer argument, and then will be used for both
    // the "this" pointer argument as well as the call address expression. In the normal ordering,
    // the embedded statement establishing the "this" pointer temp will execute before both uses
    // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
    // normal call argument list, and insert a placeholder which will hold the call address
    // expression. For non-x86, things are ok, because the order of execution of these is not
    // altered. However, for x86, the call address expression is inserted as the *last* argument
    // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
    // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
    // for those cases where call lowering creates an embedded form temp of "this", we will
    // create a temp here, early, that will later get morphed correctly.

    if (call->gtCallObjp)
    {
        GenTreePtr thisPtr = nullptr;
        GenTreePtr objp    = call->gtCallObjp;
        call->gtCallObjp   = nullptr;

#ifdef _TARGET_X86_
        if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
        {
            // tmp = "this"
            unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
            GenTreePtr asg    = gtNewTempAssign(lclNum, objp);

            // COMMA(tmp = "this", tmp)
            var_types  vt  = objp->TypeGet();
            GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
            thisPtr        = gtNewOperNode(GT_COMMA, vt, asg, tmp);

            objp = thisPtr;
        }
#endif // _TARGET_X86_

#if defined(_TARGET_X86_)
        // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
        // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
        // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
        if (call->NeedsNullCheck() || call->IsVirtualStub())
#else
        if (call->NeedsNullCheck())
#endif // defined(_TARGET_X86_)
        {
            // clone "this" if "this" has no side effects.
            if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
            {
                thisPtr = gtClone(objp, true);
            }

            var_types vt = objp->TypeGet();
            if (thisPtr == nullptr)
            {
                // create a temp if either "this" has side effects or "this" is too complex to clone.

                // tmp = "this"
                unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
                GenTreePtr asg    = gtNewTempAssign(lclNum, objp);

                // COMMA(tmp = "this", deref(tmp))
                GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
                GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
                asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);

                // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
                thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
            }
            else
            {
                // thisPtr = COMMA(deref("this"), "this")
                GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
                thisPtr        = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
            }

            call->gtFlags &= ~GTF_CALL_NULLCHECK;
        }
        else
        {
            thisPtr = objp;
        }

        // During rationalization tmp="this" and null check will
        // materialize as embedded stmts in right execution order.
        assert(thisPtr != nullptr);
        call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
    }

#if defined(_TARGET_AMD64_)

    // Add the extra VSD parameter to arg list in case of VSD calls.
    // Tail call arg copying thunk will move this extra VSD parameter
    // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
    // in Stublinkerx86.cpp for more details.
    CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
    if (call->IsVirtualStub())
    {
        GenTreePtr stubAddrArg;

        flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;

        if (call->gtCallType == CT_INDIRECT)
        {
            stubAddrArg = gtClone(call->gtCallAddr, true);
            noway_assert(stubAddrArg != nullptr);
        }
        else
        {
            noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);

            ssize_t addr = ssize_t(call->gtStubCallStubAddr);
            stubAddrArg  = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
        }

        // Push the stub address onto the list of arguments
        call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
    }

    // Now inject a placeholder for the real call target that Lower phase will generate.
    GenTreePtr arg   = gtNewIconNode(0, TYP_I_IMPL);
    call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);

    // Inject the pointer for the copy routine to be used for struct copying
    noway_assert(call->callSig != nullptr);
    void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
    arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
    call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);

#else // !_TARGET_AMD64_

    // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
    // append to the list.
    GenTreeArgList** ppArg = &call->gtCallArgs;
    for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
    {
        ppArg = (GenTreeArgList**)&args->gtOp2;
    }
    assert(ppArg != nullptr);
    assert(*ppArg == nullptr);

    unsigned nOldStkArgsWords =
        (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
    GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
    *ppArg        = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
    ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);

    // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
    // The constant will be replaced.
    GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
    *ppArg        = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
    ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);

    // Inject a placeholder for the flags.
    // The constant will be replaced.
    GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
    *ppArg        = gtNewListNode(arg1, nullptr);
    ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);

    // Inject a placeholder for the real call target that the Lowering phase will generate.
    // The constant will be replaced.
    GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
    *ppArg        = gtNewListNode(arg0, nullptr);

#endif // !_TARGET_AMD64_

    // It is now a varargs tail call dispatched via helper.
    call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
    call->gtFlags &= ~GTF_CALL_POP_ARGS;

#endif // _TARGET_*

    JITDUMP("fgMorphTailCall (after):\n");
    DISPTREE(call);
}

//------------------------------------------------------------------------------
// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
//
//
// Arguments:
//    block  - basic block ending with a recursive fast tail call
//    recursiveTailCall - recursive tail call to transform
//
// Notes:
//    The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.

void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
{
    assert(recursiveTailCall->IsTailCallConvertibleToLoop());
    GenTreePtr last = block->lastStmt();
    assert(recursiveTailCall == last->gtStmt.gtStmtExpr);

    // Transform recursive tail call into a loop.

    GenTreePtr earlyArgInsertionPoint = last;
    IL_OFFSETX callILOffset           = last->gtStmt.gtStmtILoffsx;

    // Hoist arg setup statement for the 'this' argument.
    GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
    if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
    {
        GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
        fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
    }

    // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
    // then the temps need to be assigned to the method parameters. This is done so that the caller
    // parameters are not re-assigned before call arguments depending on them  are evaluated.
    // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
    // where the next temp or parameter assignment should be inserted.

    // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
    // while the second call argument (const 1) doesn't.
    // Basic block before tail recursion elimination:
    //  ***** BB04, stmt 1 (top level)
    //  [000037] ------------             *  stmtExpr  void  (top level) (IL 0x00A...0x013)
    //  [000033] --C - G------ - \--*  call      void   RecursiveMethod
    //  [000030] ------------ | / --*  const     int - 1
    //  [000031] ------------arg0 in rcx + --*  +int
    //  [000029] ------------ | \--*  lclVar    int    V00 arg1
    //  [000032] ------------arg1 in rdx    \--*  const     int    1
    //
    //
    //  Basic block after tail recursion elimination :
    //  ***** BB04, stmt 1 (top level)
    //  [000051] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
    //  [000030] ------------ | / --*  const     int - 1
    //  [000031] ------------ | / --*  +int
    //  [000029] ------------ | | \--*  lclVar    int    V00 arg1
    //  [000050] - A----------             \--* = int
    //  [000049] D------N----                \--*  lclVar    int    V02 tmp0
    //
    //  ***** BB04, stmt 2 (top level)
    //  [000055] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
    //  [000052] ------------ | / --*  lclVar    int    V02 tmp0
    //  [000054] - A----------             \--* = int
    //  [000053] D------N----                \--*  lclVar    int    V00 arg0

    //  ***** BB04, stmt 3 (top level)
    //  [000058] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
    //  [000032] ------------ | / --*  const     int    1
    //  [000057] - A----------             \--* = int
    //  [000056] D------N----                \--*  lclVar    int    V01 arg1

    GenTreePtr tmpAssignmentInsertionPoint   = last;
    GenTreePtr paramAssignmentInsertionPoint = last;

    // Process early args. They may contain both setup statements for late args and actual args.
    // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
    // below has the correct second argument.
    int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
    for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
         (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
    {
        GenTreePtr earlyArg = earlyArgs->Current();
        if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
        {
            if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
            {
                // This is a setup node so we need to hoist it.
                GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
                fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
            }
            else
            {
                // This is an actual argument that needs to be assigned to the corresponding caller parameter.
                fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
                GenTreePtr       paramAssignStmt =
                    fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
                                                          tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
                if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
                {
                    // All temp assignments will happen before the first param assignment.
                    tmpAssignmentInsertionPoint = paramAssignStmt;
                }
            }
        }
    }

    // Process late args.
    int lateArgIndex = 0;
    for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
         (lateArgIndex++, lateArgs = lateArgs->Rest()))
    {
        // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
        GenTreePtr       lateArg        = lateArgs->Current();
        fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
        GenTreePtr       paramAssignStmt =
            fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
                                                  tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);

        if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
        {
            // All temp assignments will happen before the first param assignment.
            tmpAssignmentInsertionPoint = paramAssignStmt;
        }
    }

    // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
    // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
    // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
    if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
    {
        var_types  thisType           = lvaTable[info.compThisArg].TypeGet();
        GenTreePtr arg0               = gtNewLclvNode(lvaArg0Var, thisType);
        GenTreePtr arg0Assignment     = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
        GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
        fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
    }

    // Remove the call
    fgRemoveStmt(block, last);

    // Set the loop edge.
    block->bbJumpKind = BBJ_ALWAYS;
    block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
    fgAddRefPred(block->bbJumpDest, block);
    block->bbFlags &= ~BBF_HAS_JMP;
}

//------------------------------------------------------------------------------
// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
//
//
// Arguments:
//    arg  -  argument to assign
//    argTabEntry  -  argument table entry corresponding to arg
//    block  --- basic block the call is in
//    callILOffset  -  IL offset of the call
//    tmpAssignmentInsertionPoint  -  tree before which temp assignment should be inserted (if necessary)
//    paramAssignmentInsertionPoint  -  tree before which parameter assignment should be inserted
//
// Return Value:
//    parameter assignment statement if one was inserted; nullptr otherwise.

GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr       arg,
                                                           fgArgTabEntryPtr argTabEntry,
                                                           BasicBlock*      block,
                                                           IL_OFFSETX       callILOffset,
                                                           GenTreePtr       tmpAssignmentInsertionPoint,
                                                           GenTreePtr       paramAssignmentInsertionPoint)
{
    // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
    // some argument trees may reference parameters directly.

    GenTreePtr argInTemp             = nullptr;
    unsigned   originalArgNum        = argTabEntry->argNum;
    bool       needToAssignParameter = true;

    // TODO-CQ: enable calls with struct arguments passed in registers.
    noway_assert(!varTypeIsStruct(arg->TypeGet()));

    if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
    {
        // The argument is already assigned to a temp or is a const.
        argInTemp = arg;
    }
    else if (arg->OperGet() == GT_LCL_VAR)
    {
        unsigned   lclNum = arg->AsLclVar()->gtLclNum;
        LclVarDsc* varDsc = &lvaTable[lclNum];
        if (!varDsc->lvIsParam)
        {
            // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
            argInTemp = arg;
        }
        else if (lclNum == originalArgNum)
        {
            // The argument is the same parameter local that we were about to assign so
            // we can skip the assignment.
            needToAssignParameter = false;
        }
    }

    // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
    // any caller parameters. Some common cases are handled above but we may be able to eliminate
    // more temp assignments.

    GenTreePtr paramAssignStmt = nullptr;
    if (needToAssignParameter)
    {
        if (argInTemp == nullptr)
        {
            // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
            // TODO: we can avoid a temp assignment if we can prove that the argument tree
            // doesn't involve any caller parameters.
            unsigned   tmpNum        = lvaGrabTemp(true DEBUGARG("arg temp"));
            GenTreePtr tempSrc       = arg;
            GenTreePtr tempDest      = gtNewLclvNode(tmpNum, tempSrc->gtType);
            GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
            GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
            fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
            argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
        }

        // Now assign the temp to the parameter.
        LclVarDsc* paramDsc = lvaTable + originalArgNum;
        assert(paramDsc->lvIsParam);
        GenTreePtr paramDest       = gtNewLclvNode(originalArgNum, paramDsc->lvType);
        GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
        paramAssignStmt            = gtNewStmt(paramAssignNode, callILOffset);

        fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
    }
    return paramAssignStmt;
}

/*****************************************************************************
 *
 *  Transform the given GT_CALL tree for code generation.
 */

GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
{
    if (call->CanTailCall())
    {
        // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
        assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());

        // It cannot be an inline candidate
        assert(!call->IsInlineCandidate());

        const char* szFailReason   = nullptr;
        bool        hasStructParam = false;
        if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
        {
            szFailReason = "Might turn into an intrinsic";
        }

        if (opts.compNeedSecurityCheck)
        {
            szFailReason = "Needs security check";
        }
        else if (compLocallocUsed)
        {
            szFailReason = "Localloc used";
        }
#ifdef _TARGET_AMD64_
        // Needed for Jit64 compat.
        // In future, enabling tail calls from methods that need GS cookie check
        // would require codegen side work to emit GS cookie check before a tail
        // call.
        else if (getNeedsGSSecurityCookie())
        {
            szFailReason = "GS Security cookie check";
        }
#endif
#ifdef DEBUG
        // DDB 99324: Just disable tailcall under compGcChecks stress mode.
        else if (opts.compGcChecks)
        {
            szFailReason = "GcChecks";
        }
#endif
#if FEATURE_TAILCALL_OPT
        else
        {
            // We are still not sure whether it can be a tail call. Because, when converting
            // a call to an implicit tail call, we must check that there are no locals with
            // their address taken.  If this is the case, we have to assume that the address
            // has been leaked and the current stack frame must live until after the final
            // call.

            // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
            // that lvHasLdAddrOp is much more conservative.  We cannot just base it on
            // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
            // during morph stage. The reason for also checking lvAddrExposed is that in case
            // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
            // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
            // never to be incorrect.
            //
            // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
            // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
            // is set. This avoids the need for iterating through all lcl vars of the current
            // method.  Right now throughout the code base we are not consistently using 'set'
            // method to set lvHasLdAddrOp and lvAddrExposed flags.
            unsigned   varNum;
            LclVarDsc* varDsc;
            bool       hasAddrExposedVars     = false;
            bool       hasStructPromotedParam = false;
            bool       hasPinnedVars          = false;

            for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
            {
                // If the method is marked as an explicit tail call we will skip the
                // following three hazard checks.
                // We still must check for any struct parameters and set 'hasStructParam'
                // so that we won't transform the recursive tail call into a loop.
                //
                if (call->IsImplicitTailCall())
                {
                    if (varDsc->lvHasLdAddrOp)
                    {
                        hasAddrExposedVars = true;
                        break;
                    }
                    if (varDsc->lvAddrExposed)
                    {
                        if (lvaIsImplicitByRefLocal(varNum))
                        {
                            // The address of the implicit-byref is a non-address use of the pointer parameter.
                        }
                        else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
                        {
                            // The address of the implicit-byref's field is likewise a non-address use of the pointer
                            // parameter.
                        }
                        else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
                        {
                            // This temp was used for struct promotion bookkeeping.  It will not be used, and will have
                            // its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
                            assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
                            assert(fgGlobalMorph);
                        }
                        else
                        {
                            hasAddrExposedVars = true;
                            break;
                        }
                    }
                    if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
                    {
                        hasStructPromotedParam = true;
                        break;
                    }
                    if (varDsc->lvPinned)
                    {
                        // A tail call removes the method from the stack, which means the pinning
                        // goes away for the callee.  We can't allow that.
                        hasPinnedVars = true;
                        break;
                    }
                }
                if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
                {
                    hasStructParam = true;
                    // This prevents transforming a recursive tail call into a loop
                    // but doesn't prevent tail call optimization so we need to
                    // look at the rest of parameters.
                    continue;
                }
            }

            if (hasAddrExposedVars)
            {
                szFailReason = "Local address taken";
            }
            if (hasStructPromotedParam)
            {
                szFailReason = "Has Struct Promoted Param";
            }
            if (hasPinnedVars)
            {
                szFailReason = "Has Pinned Vars";
            }
        }
#endif // FEATURE_TAILCALL_OPT

        if (varTypeIsStruct(call))
        {
            fgFixupStructReturn(call);
        }

        var_types callType = call->TypeGet();

        // We have to ensure to pass the incoming retValBuf as the
        // outgoing one. Using a temp will not do as this function will
        // not regain control to do the copy.

        if (info.compRetBuffArg != BAD_VAR_NUM)
        {
            noway_assert(callType == TYP_VOID);
            GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
            if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
            {
                szFailReason = "Need to copy return buffer";
            }
        }

        // If this is an opportunistic tail call and cannot be dispatched as
        // fast tail call, go the non-tail call route.  This is done for perf
        // reason.
        //
        // Avoid the cost of determining whether can be dispatched as fast tail
        // call if we already know that tail call cannot be honored for other
        // reasons.
        bool canFastTailCall = false;
        if (szFailReason == nullptr)
        {
            canFastTailCall = fgCanFastTailCall(call);
            if (!canFastTailCall)
            {
                // Implicit or opportunistic tail calls are always dispatched via fast tail call
                // mechanism and never via tail call helper for perf.
                if (call->IsImplicitTailCall())
                {
                    szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
                }
#ifndef LEGACY_BACKEND
                else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
                {
                    // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
                    // dispatched as a fast tail call.

                    // Methods with non-standard args will have indirection cell or cookie param passed
                    // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
                    // tail calling the target method and hence ".tail" prefix on such calls needs to be
                    // ignored.
                    //
                    // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
                    // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
                    // This is done by by adding stubAddr as an additional arg before the original list of
                    // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
                    // in Stublinkerx86.cpp.
                    szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
                                   "called via helper";
                }
#ifdef _TARGET_ARM64_
                else
                {
                    // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
                    // So, bail out if we can't make fast tail call.
                    szFailReason = "Non-qualified fast tail call";
                }
#endif
#endif // LEGACY_BACKEND
            }
        }

        // Clear these flags before calling fgMorphCall() to avoid recursion.
        bool isTailPrefixed = call->IsTailPrefixedCall();
        call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;

#if FEATURE_TAILCALL_OPT
        call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
#endif

#ifdef FEATURE_PAL
        if (!canFastTailCall && szFailReason == nullptr)
        {
            szFailReason = "Non fast tail calls disabled for PAL based systems.";
        }
#endif // FEATURE_PAL

        if (szFailReason != nullptr)
        {
#ifdef DEBUG
            if (verbose)
            {
                printf("\nRejecting tail call late for call ");
                printTreeID(call);
                printf(": %s\n", szFailReason);
            }
#endif

            // for non user funcs, we have no handles to report
            info.compCompHnd->reportTailCallDecision(nullptr,
                                                     (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
                                                     isTailPrefixed, TAILCALL_FAIL, szFailReason);

            goto NO_TAIL_CALL;
        }

#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
        // We enable shared-ret tail call optimization for recursive calls even if
        // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
        if (gtIsRecursiveCall(call))
#endif
        {
            // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
            // but if the call falls through to a ret, and we are doing a tailcall, change it here.
            if (compCurBB->bbJumpKind != BBJ_RETURN)
            {
                compCurBB->bbJumpKind = BBJ_RETURN;
            }
        }

        // Set this flag before calling fgMorphCall() to prevent inlining this call.
        call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;

        bool fastTailCallToLoop = false;
#if FEATURE_TAILCALL_OPT
        // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
        // or return type is a struct that can be passed in a register.
        //
        // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
        // hidden generic context param or through keep alive thisptr), then while transforming a recursive
        // call to such a method requires that the generic context stored on stack slot be updated.  Right now,
        // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
        // a recursive call into a loop.  Another option is to modify gtIsRecursiveCall() to check that the
        // generic type parameters of both caller and callee generic method are the same.
        if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
            !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
        {
            call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
            fastTailCallToLoop = true;
        }
#endif

        // Do some target-specific transformations (before we process the args, etc.)
        // This is needed only for tail prefixed calls that cannot be dispatched as
        // fast calls.
        if (!canFastTailCall)
        {
            fgMorphTailCall(call);
        }

        // Implementation note : If we optimize tailcall to do a direct jump
        // to the target function (after stomping on the return address, etc),
        // without using CORINFO_HELP_TAILCALL, we have to make certain that
        // we don't starve the hijacking logic (by stomping on the hijacked
        // return address etc).

        // At this point, we are committed to do the tailcall.
        compTailCallUsed = true;

        CorInfoTailCall tailCallResult;

        if (fastTailCallToLoop)
        {
            tailCallResult = TAILCALL_RECURSIVE;
        }
        else if (canFastTailCall)
        {
            tailCallResult = TAILCALL_OPTIMIZED;
        }
        else
        {
            tailCallResult = TAILCALL_HELPER;
        }

        // for non user funcs, we have no handles to report
        info.compCompHnd->reportTailCallDecision(nullptr,
                                                 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
                                                 isTailPrefixed, tailCallResult, nullptr);

        // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
        // to avoid doing any extra work for the return value.
        call->gtType = TYP_VOID;

#ifdef DEBUG
        if (verbose)
        {
            printf("\nGTF_CALL_M_TAILCALL bit set for call ");
            printTreeID(call);
            printf("\n");
            if (fastTailCallToLoop)
            {
                printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
                printTreeID(call);
                printf("\n");
            }
        }
#endif

        GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;

#ifdef DEBUG
        // Tail call needs to be in one of the following IR forms
        //    Either a call stmt or
        //    GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
        //    var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
        //    GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
        // In the above,
        //    GT_CASTS may be nested.
        genTreeOps stmtOper = stmtExpr->gtOper;
        if (stmtOper == GT_CALL)
        {
            noway_assert(stmtExpr == call);
        }
        else
        {
            noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
            GenTreePtr treeWithCall;
            if (stmtOper == GT_RETURN)
            {
                treeWithCall = stmtExpr->gtGetOp1();
            }
            else if (stmtOper == GT_COMMA)
            {
                // Second operation must be nop.
                noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
                treeWithCall = stmtExpr->gtGetOp1();
            }
            else
            {
                treeWithCall = stmtExpr->gtGetOp2();
            }

            // Peel off casts
            while (treeWithCall->gtOper == GT_CAST)
            {
                noway_assert(!treeWithCall->gtOverflow());
                treeWithCall = treeWithCall->gtGetOp1();
            }

            noway_assert(treeWithCall == call);
        }
#endif

        // For void calls, we would have created a GT_CALL in the stmt list.
        // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
        // For calls returning structs, we would have a void call, followed by a void return.
        // For debuggable code, it would be an assignment of the call to a temp
        // We want to get rid of any of this extra trees, and just leave
        // the call.
        GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;

#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_)
        // Legacy Jit64 Compat:
        // There could be any number of GT_NOPs between tail call and GT_RETURN.
        // That is tail call pattern could be one of the following:
        //  1) tail.call, nop*, ret
        //  2) tail.call, nop*, pop, nop*, ret
        //  3) var=tail.call, nop*, ret(var)
        //  4) var=tail.call, nop*, pop, ret
        //  5) comma(tail.call, nop), nop*, ret
        //
        // See impIsTailCallILPattern() for details on tail call IL patterns
        // that are supported.
        if (stmtExpr->gtOper != GT_RETURN)
        {
            // First delete all GT_NOPs after the call
            GenTreeStmt* morphStmtToRemove = nullptr;
            while (nextMorphStmt != nullptr)
            {
                GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
                if (!nextStmtExpr->IsNothingNode())
                {
                    break;
                }

                morphStmtToRemove = nextMorphStmt;
                nextMorphStmt     = morphStmtToRemove->gtNextStmt;
                fgRemoveStmt(compCurBB, morphStmtToRemove);
            }

            // Check to see if there is a pop.
            // Since tail call is honored, we can get rid of the stmt corresponding to pop.
            if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
            {
                // Note that pop opcode may or may not result in a new stmt (for details see
                // impImportBlockCode()). Hence, it is not possible to assert about the IR
                // form generated by pop but pop tree must be side-effect free so that we can
                // delete it safely.
                GenTreeStmt* popStmt = nextMorphStmt;
                nextMorphStmt        = nextMorphStmt->gtNextStmt;

                // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
                // the constituent nodes.
                GenTreePtr popExpr          = popStmt->gtStmtExpr;
                bool       isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
                if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
                {
                    isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
                                       ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
                }
                noway_assert(isSideEffectFree);
                fgRemoveStmt(compCurBB, popStmt);
            }

            // Next delete any GT_NOP nodes after pop
            while (nextMorphStmt != nullptr)
            {
                GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
                if (!nextStmtExpr->IsNothingNode())
                {
                    break;
                }

                morphStmtToRemove = nextMorphStmt;
                nextMorphStmt     = morphStmtToRemove->gtNextStmt;
                fgRemoveStmt(compCurBB, morphStmtToRemove);
            }
        }
#endif // !FEATURE_CORECLR && _TARGET_AMD64_

        // Delete GT_RETURN  if any
        if (nextMorphStmt != nullptr)
        {
            GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
            noway_assert(retExpr->gtOper == GT_RETURN);

            // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
            // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
            if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
            {
                noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
                noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
                             retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
            }

            fgRemoveStmt(compCurBB, nextMorphStmt);
        }

        fgMorphStmt->gtStmtExpr = call;

        // Tail call via helper: The VM can't use return address hijacking if we're
        // not going to return and the helper doesn't have enough info to safely poll,
        // so we poll before the tail call, if the block isn't already safe.  Since
        // tail call via helper is a slow mechanism it doen't matter whether we emit
        // GC poll.  This is done to be in parity with Jit64. Also this avoids GC info
        // size increase if all most all methods are expected to be tail calls (e.g. F#).
        //
        // Note that we can avoid emitting GC-poll if we know that the current BB is
        // dominated by a Gc-SafePoint block.  But we don't have dominator info at this
        // point.  One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
        // here and remove it in lowering if the block is dominated by a GC-SafePoint.  For
        // now it not clear whether optimizing slow tail calls is worth the effort.  As a
        // low cost check, we check whether the first and current basic blocks are
        // GC-SafePoints.
        //
        // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
        // is going to mark the method as fully interruptible if the block containing this tail
        // call is reachable without executing any call.
        if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
            !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
        {
            // We didn't insert a poll block, so we need to morph the call now
            // (Normally it will get morphed when we get to the split poll block)
            GenTreePtr temp = fgMorphCall(call);
            noway_assert(temp == call);
        }

        // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
        // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
        //
        // Fast tail call: in case of fast tail calls, we need a jmp epilog and
        // hence mark it as BBJ_RETURN with BBF_JMP flag set.
        noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);

        if (canFastTailCall)
        {
            compCurBB->bbFlags |= BBF_HAS_JMP;
        }
        else
        {
            compCurBB->bbJumpKind = BBJ_THROW;
        }

        // For non-void calls, we return a place holder which will be
        // used by the parent GT_RETURN node of this call.

        GenTree* result = call;
        if (callType != TYP_VOID && info.compRetType != TYP_VOID)
        {
#ifdef FEATURE_HFA
            // Return a dummy node, as the return is already removed.
            if (callType == TYP_STRUCT)
            {
                // This is a HFA, use float 0.
                callType = TYP_FLOAT;
            }
#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
            // Return a dummy node, as the return is already removed.
            if (varTypeIsStruct(callType))
            {
                // This is a register-returned struct. Return a 0.
                // The actual return registers are hacked in lower and the register allocator.
                callType = TYP_INT;
            }
#endif
#ifdef FEATURE_SIMD
            // Return a dummy node, as the return is already removed.
            if (varTypeIsSIMD(callType))
            {
                callType = TYP_DOUBLE;
            }
#endif
            result = gtNewZeroConNode(genActualType(callType));
            result = fgMorphTree(result);
        }

        return result;
    }

NO_TAIL_CALL:

    if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
        (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
#ifdef FEATURE_READYTORUN_COMPILER
         || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
#endif
             ) &&
        (call == fgMorphStmt->gtStmtExpr))
    {
        // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
        // Transform it into a null check.

        GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;

        GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
        nullCheck->gtFlags |= GTF_EXCEPT;

        return fgMorphTree(nullCheck);
    }

    noway_assert(call->gtOper == GT_CALL);

    //
    // Only count calls once (only in the global morph phase)
    //
    if (fgGlobalMorph)
    {
        if (call->gtCallType == CT_INDIRECT)
        {
            optCallCount++;
            optIndirectCallCount++;
        }
        else if (call->gtCallType == CT_USER_FUNC)
        {
            optCallCount++;
            if (call->IsVirtual())
            {
                optIndirectCallCount++;
            }
        }
    }

    // Couldn't inline - remember that this BB contains method calls

    // If this is a 'regular' call, mark the basic block as
    // having a call (for computing full interruptibility).
    CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef _TARGET_AMD64_
    // Amd64 note: If this is a fast tail call then don't count it as a call
    // since we don't insert GC-polls but instead make the method fully GC
    // interruptible.
    if (!call->IsFastTailCall())
#endif
    {
        if (call->gtCallType == CT_INDIRECT)
        {
            compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
        }
        else if (call->gtCallType == CT_USER_FUNC)
        {
            if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
            {
                compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
            }
        }
        // otherwise we have a CT_HELPER
    }

    // Morph Type.op_Equality and Type.op_Inequality
    // We need to do this before the arguments are morphed
    if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
    {
        CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);

        genTreeOps simpleOp = GT_CALL;
        if (methodID == CORINFO_INTRINSIC_TypeEQ)
        {
            simpleOp = GT_EQ;
        }
        else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
        {
            simpleOp = GT_NE;
        }

        if (simpleOp == GT_EQ || simpleOp == GT_NE)
        {
            noway_assert(call->TypeGet() == TYP_INT);

            // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
            // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
            // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
            // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
            // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
            // optimization below.

            GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
            GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;

            if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
            {
                GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);

                // fgMorphSmpOp will further optimize the following patterns:
                //  1. typeof(...) == typeof(...)
                //  2. typeof(...) == obj.GetType()
                return fgMorphTree(compare);
            }
        }
    }

    // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
    GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
                                   // copy-back).
    unsigned             retValTmpNum = BAD_VAR_NUM;
    CORINFO_CLASS_HANDLE structHnd    = nullptr;
    if (call->HasRetBufArg() &&
        call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
    {
        // We're enforcing the invariant that return buffers pointers (at least for
        // struct return types containing GC pointers) are never pointers into the heap.
        // The large majority of cases are address of local variables, which are OK.
        // Otherwise, allocate a local of the given struct type, pass its address,
        // then assign from that into the proper destination.  (We don't need to do this
        // if we're passing the caller's ret buff arg to the callee, since the caller's caller
        // will maintain the same invariant.)

        GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
        assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
        if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
        {
            // We'll exempt helper calls from this, assuming that the helper implementation
            // follows the old convention, and does whatever barrier is required.
            if (call->gtCallType != CT_HELPER)
            {
                structHnd = call->gtRetClsHnd;
                if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
                    !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
                      dest->gtLclVar.gtLclNum == info.compRetBuffArg))
                {
                    origDest = dest;

                    retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
                    lvaSetStruct(retValTmpNum, structHnd, true);
                    dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
                }
            }
        }

        call->gtCallArgs->gtOp.gtOp1 = dest;
    }

    /* Process the "normal" argument list */
    call = fgMorphArgs(call);
    noway_assert(call->gtOper == GT_CALL);

    // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
    // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
    if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
    {
        GenTree* value = gtArgEntryByArgNum(call, 2)->node;
        if (value->IsIntegralConst(0))
        {
            assert(value->OperGet() == GT_CNS_INT);

            GenTree* arr   = gtArgEntryByArgNum(call, 0)->node;
            GenTree* index = gtArgEntryByArgNum(call, 1)->node;

            // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
            // the spill trees as well if necessary.
            GenTreeOp* argSetup = nullptr;
            for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
            {
                GenTree* const arg = earlyArgs->Current();
                if (arg->OperGet() != GT_ASG)
                {
                    continue;
                }

                assert(arg != arr);
                assert(arg != index);

                arg->gtFlags &= ~GTF_LATE_ARG;

                GenTree* op1 = argSetup;
                if (op1 == nullptr)
                {
                    op1 = gtNewNothingNode();
#if DEBUG
                    op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
                }

                argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);

#if DEBUG
                argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
            }

#ifdef DEBUG
            auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
                (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
                return WALK_CONTINUE;
            };

            fgWalkTreePost(&arr, resetMorphedFlag);
            fgWalkTreePost(&index, resetMorphedFlag);
            fgWalkTreePost(&value, resetMorphedFlag);
#endif // DEBUG

            GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
            GenTree* const arrIndexNode   = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
            GenTree* const arrStore       = gtNewAssignNode(arrIndexNode, value);
            arrStore->gtFlags |= GTF_ASG;

            GenTree* result = fgMorphTree(arrStore);
            if (argSetup != nullptr)
            {
                result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
#if DEBUG
                result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
            }

            return result;
        }
    }

    // Optimize get_ManagedThreadId(get_CurrentThread)
    if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
        info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
    {
        noway_assert(origDest == nullptr);
        noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);

        GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;

        if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
            info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
                CORINFO_INTRINSIC_GetCurrentManagedThread)
        {
            // substitute expression with call to helper
            GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
            JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
            return fgMorphTree(newCall);
        }
    }

    if (origDest != nullptr)
    {
        GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
        // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
        // var, which would allow the whole assignment to be optimized away to a NOP.  So in that case, make the
        // origDest into a comma that uses the var.  Note that the var doesn't have to be a temp for this to
        // be correct.
        if (origDest->OperGet() == GT_ASG)
        {
            if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
            {
                GenTreePtr var = origDest->gtOp.gtOp1;
                origDest       = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
                                         gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
            }
        }
        GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
        copyBlk            = fgMorphTree(copyBlk);
        GenTree* result    = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
#ifdef DEBUG
        result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
        return result;
    }

    if (call->IsNoReturn())
    {
        //
        // If we know that the call does not return then we can set fgRemoveRestOfBlock
        // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
        // As a result the compiler won't need to preserve live registers across the call.
        //
        // This isn't need for tail calls as there shouldn't be any code after the call anyway.
        // Besides, the tail call code is part of the epilog and converting the block to
        // BBJ_THROW would result in the tail call being dropped as the epilog is generated
        // only for BBJ_RETURN blocks.
        //
        // Currently this doesn't work for non-void callees. Some of the code that handles
        // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
        // do not have this flag by default. We could add the flag here but the proper solution
        // would be to replace the return expression with a local var node during inlining
        // so the rest of the call tree stays in a separate statement. That statement can then
        // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
        //

        if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
        {
            fgRemoveRestOfBlock = true;
        }
    }

    return call;
}

/*****************************************************************************
 *
 *  Transform the given GTK_CONST tree for code generation.
 */

GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
{
    noway_assert(tree->OperKind() & GTK_CONST);

    /* Clear any exception flags or other unnecessary flags
     * that may have been set before folding this node to a constant */

    tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);

    if (tree->OperGet() != GT_CNS_STR)
    {
        return tree;
    }

    // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
    // guarantee slow performance for that block. Instead cache the return value
    // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.

    if (compCurBB->bbJumpKind == BBJ_THROW)
    {
        CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
        if (helper != CORINFO_HELP_UNDEF)
        {
            // For un-important blocks, we want to construct the string lazily

            GenTreeArgList* args;
            if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
            {
                args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
            }
            else
            {
                args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
                                    gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
            }

            tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
            return fgMorphTree(tree);
        }
    }

    assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));

    LPVOID         pValue;
    InfoAccessType iat =
        info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);

    tree = gtNewStringLiteralNode(iat, pValue);

    return fgMorphTree(tree);
}

/*****************************************************************************
 *
 *  Transform the given GTK_LEAF tree for code generation.
 */

GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
{
    noway_assert(tree->OperKind() & GTK_LEAF);

    if (tree->gtOper == GT_LCL_VAR)
    {
        const bool forceRemorph = false;
        return fgMorphLocalVar(tree, forceRemorph);
    }
#ifdef _TARGET_X86_
    else if (tree->gtOper == GT_LCL_FLD)
    {
        if (info.compIsVarArgs)
        {
            GenTreePtr newTree =
                fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
            if (newTree != nullptr)
            {
                if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
                {
                    fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
                }
                return newTree;
            }
        }
    }
#endif // _TARGET_X86_
    else if (tree->gtOper == GT_FTN_ADDR)
    {
        CORINFO_CONST_LOOKUP addrInfo;

#ifdef FEATURE_READYTORUN_COMPILER
        if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
        {
            addrInfo = tree->gtFptrVal.gtEntryPoint;
        }
        else
#endif
        {
            info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
        }

        // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
        //
        tree->SetOper(GT_CNS_INT);
        tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
        tree->gtFlags |= GTF_ICON_FTN_ADDR;

        switch (addrInfo.accessType)
        {
            case IAT_PPVALUE:
                tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
                tree->gtFlags |= GTF_IND_INVARIANT;

                __fallthrough;

            case IAT_PVALUE:
                tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
                break;

            case IAT_VALUE:
                tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
                break;

            default:
                noway_assert(!"Unknown addrInfo.accessType");
        }

        return fgMorphTree(tree);
    }

    return tree;
}

void Compiler::fgAssignSetVarDef(GenTreePtr tree)
{
    GenTreeLclVarCommon* lclVarCmnTree;
    bool                 isEntire = false;
    if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
    {
        if (isEntire)
        {
            lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
        }
        else
        {
            // We consider partial definitions to be modeled as uses followed by definitions.
            // This captures the idea that precedings defs are not necessarily made redundant
            // by this definition.
            lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
        }
    }
}

//------------------------------------------------------------------------
// fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
//
// Arguments:
//    tree - The block assignment to be possibly morphed
//
// Return Value:
//    The modified tree if successful, nullptr otherwise.
//
// Assumptions:
//    'tree' must be a block assignment.
//
// Notes:
//    If successful, this method always returns the incoming tree, modifying only
//    its arguments.

GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
{
    // This must be a block assignment.
    noway_assert(tree->OperIsBlkOp());
    var_types asgType = tree->TypeGet();

    GenTreePtr asg         = tree;
    GenTreePtr dest        = asg->gtGetOp1();
    GenTreePtr src         = asg->gtGetOp2();
    unsigned   destVarNum  = BAD_VAR_NUM;
    LclVarDsc* destVarDsc  = nullptr;
    GenTreePtr lclVarTree  = nullptr;
    bool       isCopyBlock = asg->OperIsCopyBlkOp();
    bool       isInitBlock = !isCopyBlock;

    unsigned             size;
    CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
#ifdef FEATURE_SIMD
    // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
    // The SIMD type in question could be Vector2f which is 8-bytes in size.
    // The below check is to make sure that we don't turn that copyblk
    // into a assignment, since rationalizer logic will transform the
    // copyblk appropriately. Otherwise, the transformation made in this
    // routine will prevent rationalizer logic and we might end up with
    // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
    // in codegen.
    // TODO-1stClassStructs: This is here to preserve old behavior.
    // It should be eliminated.
    if (src->OperGet() == GT_SIMD)
    {
        return nullptr;
    }
#endif

    if (dest->gtEffectiveVal()->OperIsBlk())
    {
        GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
        size               = lhsBlk->Size();
        if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
        {
            destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
            destVarDsc = &(lvaTable[destVarNum]);
        }
        if (lhsBlk->OperGet() == GT_OBJ)
        {
            clsHnd = lhsBlk->AsObj()->gtClass;
        }
    }
    else
    {
        // Is this an enregisterable struct that is already a simple assignment?
        // This can happen if we are re-morphing.
        if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
        {
            return tree;
        }
        noway_assert(dest->OperIsLocal());
        lclVarTree = dest;
        destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
        destVarDsc = &(lvaTable[destVarNum]);
        if (isCopyBlock)
        {
            clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
            size   = info.compCompHnd->getClassSize(clsHnd);
        }
        else
        {
            size = destVarDsc->lvExactSize;
        }
    }

    //
    //  See if we can do a simple transformation:
    //
    //          GT_ASG <TYP_size>
    //          /   \
    //      GT_IND GT_IND or CNS_INT
    //         |      |
    //       [dest] [src]
    //

    if (size == REGSIZE_BYTES)
    {
        if (clsHnd == NO_CLASS_HANDLE)
        {
            // A register-sized cpblk can be treated as an integer asignment.
            asgType = TYP_I_IMPL;
        }
        else
        {
            BYTE gcPtr;
            info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
            asgType = getJitGCType(gcPtr);
        }
    }
    else
    {
        switch (size)
        {
            case 1:
                asgType = TYP_BYTE;
                break;
            case 2:
                asgType = TYP_SHORT;
                break;

#ifdef _TARGET_64BIT_
            case 4:
                asgType = TYP_INT;
                break;
#endif // _TARGET_64BIT_
        }
    }

    // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
    if (!varTypeIsStruct(asgType))
    {
        // For initBlk, a non constant source is not going to allow us to fiddle
        // with the bits to create a single assigment.
        noway_assert(size <= REGSIZE_BYTES);

        if (isInitBlock && !src->IsConstInitVal())
        {
            return nullptr;
        }

        if (destVarDsc != nullptr)
        {
#if LOCAL_ASSERTION_PROP
            // Kill everything about dest
            if (optLocalAssertionProp)
            {
                if (optAssertionCount > 0)
                {
                    fgKillDependentAssertions(destVarNum DEBUGARG(tree));
                }
            }
#endif // LOCAL_ASSERTION_PROP

            // A previous incarnation of this code also required the local not to be
            // address-exposed(=taken).  That seems orthogonal to the decision of whether
            // to do field-wise assignments: being address-exposed will cause it to be
            // "dependently" promoted, so it will be in the right memory location.  One possible
            // further reason for avoiding field-wise stores is that the struct might have alignment-induced
            // holes, whose contents could be meaningful in unsafe code.  If we decide that's a valid
            // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
            // memory of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
            if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
            {
                // Let fgMorphInitBlock handle it.  (Since we'll need to do field-var-wise assignments.)
                return nullptr;
            }
            else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
            {
                // Use the dest local var directly, as well as its type.
                dest    = lclVarTree;
                asgType = destVarDsc->lvType;

                // If the block operation had been a write to a local var of a small int type,
                // of the exact size of the small int type, and the var is NormalizeOnStore,
                // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
                // have done that normalization.  If we're now making it into an assignment,
                // the NormalizeOnStore will work, and it can be a full def.
                if (destVarDsc->lvNormalizeOnStore())
                {
                    dest->gtFlags &= (~GTF_VAR_USEASG);
                }
            }
            else
            {
                // Could be a non-promoted struct, or a floating point type local, or
                // an int subject to a partial write.  Don't enregister.
                lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));

                // Mark the local var tree as a definition point of the local.
                lclVarTree->gtFlags |= GTF_VAR_DEF;
                if (size < destVarDsc->lvExactSize)
                { // If it's not a full-width assignment....
                    lclVarTree->gtFlags |= GTF_VAR_USEASG;
                }

                if (dest == lclVarTree)
                {
                    dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
                }
            }
        }

        // Check to ensure we don't have a reducible *(& ... )
        if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
        {
            GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
            // Ignore reinterpret casts between int/gc
            if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
            {
                dest    = addrOp;
                asgType = addrOp->TypeGet();
            }
        }

        if (dest->gtEffectiveVal()->OperIsIndir())
        {
            // If we have no information about the destination, we have to assume it could
            // live anywhere (not just in the GC heap).
            // Mark the GT_IND node so that we use the correct write barrier helper in case
            // the field is a GC ref.

            if (!fgIsIndirOfAddrOfLocal(dest))
            {
                dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
                tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
            }
        }

        LclVarDsc* srcVarDsc = nullptr;
        if (isCopyBlock)
        {
            if (src->OperGet() == GT_LCL_VAR)
            {
                lclVarTree = src;
                srcVarDsc  = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
            }
            else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
            {
                srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
            }
            if (srcVarDsc != nullptr)
            {
                if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
                {
                    // Let fgMorphCopyBlock handle it.
                    return nullptr;
                }
                else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
                         size == genTypeSize(genActualType(lclVarTree->TypeGet())))
                {
                    // Use the src local var directly.
                    src = lclVarTree;
                }
                else
                {
#ifndef LEGACY_BACKEND

                    // The source argument of the copyblk can potentially
                    // be accessed only through indir(addr(lclVar))
                    // or indir(lclVarAddr) in rational form and liveness
                    // won't account for these uses. That said,
                    // we have to mark this local as address exposed so
                    // we don't delete it as a dead store later on.
                    unsigned lclVarNum                = lclVarTree->gtLclVarCommon.gtLclNum;
                    lvaTable[lclVarNum].lvAddrExposed = true;
                    lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));

#else  // LEGACY_BACKEND
                    lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
#endif // LEGACY_BACKEND
                    GenTree* srcAddr;
                    if (src == lclVarTree)
                    {
                        srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
                        src     = gtNewOperNode(GT_IND, asgType, srcAddr);
                    }
                    else
                    {
                        assert(src->OperIsIndir());
                    }
                }
            }
            // If we have no information about the src, we have to assume it could
            // live anywhere (not just in the GC heap).
            // Mark the GT_IND node so that we use the correct write barrier helper in case
            // the field is a GC ref.

            if (!fgIsIndirOfAddrOfLocal(src))
            {
                src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
            }
        }
        else
        {
// InitBlk
#if FEATURE_SIMD
            if (varTypeIsSIMD(asgType))
            {
                assert(!isCopyBlock); // Else we would have returned the tree above.
                noway_assert(src->IsIntegralConst(0));
                noway_assert(destVarDsc != nullptr);

                src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
                tree->gtOp.gtOp2 = src;
                return tree;
            }
            else
#endif
            {
                if (src->OperIsInitVal())
                {
                    src = src->gtGetOp1();
                }
                assert(src->IsCnsIntOrI());
                // This will mutate the integer constant, in place, to be the correct
                // value for the type we are using in the assignment.
                src->AsIntCon()->FixupInitBlkValue(asgType);
            }
        }

        // Ensure that the dest is setup appropriately.
        if (dest->gtEffectiveVal()->OperIsIndir())
        {
            dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
        }

        // Ensure that the rhs is setup appropriately.
        if (isCopyBlock)
        {
            src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
        }

        // Set the lhs and rhs on the assignment.
        if (dest != tree->gtOp.gtOp1)
        {
            asg->gtOp.gtOp1 = dest;
        }
        if (src != asg->gtOp.gtOp2)
        {
            asg->gtOp.gtOp2 = src;
        }

        asg->ChangeType(asgType);
        dest->gtFlags |= GTF_DONT_CSE;
        asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
        // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
        asg->gtFlags &= ~GTF_REVERSE_OPS;

#ifdef DEBUG
        if (verbose)
        {
            printf("fgMorphOneAsgBlock (after):\n");
            gtDispTree(tree);
        }
#endif
        return tree;
    }

    return nullptr;
}

//------------------------------------------------------------------------
// fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
//
// Arguments:
//    tree - a tree node with a gtOper of GT_INITBLK
//           the child nodes for tree have already been Morphed
//
// Return Value:
//    We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
//    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
//    If we have performed struct promotion of the Dest() then we will try to
//    perform a field by field assignment for each of the promoted struct fields
//
// Notes:
//    If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
//    if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
//    can not use a field by field assignment and must the orginal GT_INITBLK unmodified.

GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
{
    // We must have the GT_ASG form of InitBlkOp.
    noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
#ifdef DEBUG
    bool morphed = false;
#endif // DEBUG

    GenTree* asg      = tree;
    GenTree* src      = tree->gtGetOp2();
    GenTree* origDest = tree->gtGetOp1();

    GenTree* dest = fgMorphBlkNode(origDest, true);
    if (dest != origDest)
    {
        tree->gtOp.gtOp1 = dest;
    }
    tree->gtType = dest->TypeGet();
    // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
    // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
    if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
    {
        src->gtType = TYP_INT;
    }
    JITDUMP("\nfgMorphInitBlock:");

    GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
    if (oneAsgTree)
    {
        JITDUMP(" using oneAsgTree.\n");
        tree = oneAsgTree;
    }
    else
    {
        GenTree*             destAddr          = nullptr;
        GenTree*             initVal           = src->OperIsInitVal() ? src->gtGetOp1() : src;
        GenTree*             blockSize         = nullptr;
        unsigned             blockWidth        = 0;
        FieldSeqNode*        destFldSeq        = nullptr;
        LclVarDsc*           destLclVar        = nullptr;
        bool                 destDoFldAsg      = false;
        unsigned             destLclNum        = BAD_VAR_NUM;
        bool                 blockWidthIsConst = false;
        GenTreeLclVarCommon* lclVarTree        = nullptr;
        if (dest->IsLocal())
        {
            lclVarTree = dest->AsLclVarCommon();
        }
        else
        {
            if (dest->OperIsBlk())
            {
                destAddr   = dest->AsBlk()->Addr();
                blockWidth = dest->AsBlk()->gtBlkSize;
            }
            else
            {
                assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
                destAddr   = dest->gtGetOp1();
                blockWidth = genTypeSize(dest->TypeGet());
            }
        }
        if (lclVarTree != nullptr)
        {
            destLclNum        = lclVarTree->gtLclNum;
            destLclVar        = &lvaTable[destLclNum];
            blockWidth        = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
            blockWidthIsConst = true;
        }
        else
        {
            if (dest->gtOper == GT_DYN_BLK)
            {
                // The size must be an integer type
                blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
                assert(varTypeIsIntegral(blockSize->gtType));
            }
            else
            {
                assert(blockWidth != 0);
                blockWidthIsConst = true;
            }

            if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
            {
                destLclNum = lclVarTree->gtLclNum;
                destLclVar = &lvaTable[destLclNum];
            }
        }
        if (destLclNum != BAD_VAR_NUM)
        {
#if LOCAL_ASSERTION_PROP
            // Kill everything about destLclNum (and its field locals)
            if (optLocalAssertionProp)
            {
                if (optAssertionCount > 0)
                {
                    fgKillDependentAssertions(destLclNum DEBUGARG(tree));
                }
            }
#endif // LOCAL_ASSERTION_PROP

            if (destLclVar->lvPromoted && blockWidthIsConst)
            {
                assert(initVal->OperGet() == GT_CNS_INT);
                noway_assert(varTypeIsStruct(destLclVar));
                noway_assert(!opts.MinOpts());
                if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
                {
                    JITDUMP(" dest is address exposed");
                }
                else
                {
                    if (blockWidth == destLclVar->lvExactSize)
                    {
                        JITDUMP(" (destDoFldAsg=true)");
                        // We may decide later that a copyblk is required when this struct has holes
                        destDoFldAsg = true;
                    }
                    else
                    {
                        JITDUMP(" with mismatched size");
                    }
                }
            }
        }

        // Can we use field by field assignment for the dest?
        if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
        {
            JITDUMP(" dest contains holes");
            destDoFldAsg = false;
        }

        JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");

        // If we're doing an InitBlock and we've transformed the dest to a non-Blk
        // we need to change it back.
        if (!destDoFldAsg && !dest->OperIsBlk())
        {
            noway_assert(blockWidth != 0);
            tree->gtOp.gtOp1 = origDest;
            tree->gtType     = origDest->gtType;
        }

        if (!destDoFldAsg && (destLclVar != nullptr))
        {
            // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
            if (!destLclVar->lvRegStruct)
            {
                // Mark it as DoNotEnregister.
                lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
            }
        }

        // Mark the dest struct as DoNotEnreg
        // when they are LclVar structs and we are using a CopyBlock
        // or the struct is not promoted
        //
        if (!destDoFldAsg)
        {
#if CPU_USES_BLOCK_MOVE
            compBlkOpUsed = true;
#endif
            dest             = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
            tree->gtOp.gtOp1 = dest;
            tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
        }
        else
        {
            // The initVal must be a constant of TYP_INT
            noway_assert(initVal->OperGet() == GT_CNS_INT);
            noway_assert(genActualType(initVal->gtType) == TYP_INT);

            // The dest must be of a struct type.
            noway_assert(varTypeIsStruct(destLclVar));

            //
            // Now, convert InitBlock to individual assignments
            //

            tree = nullptr;
            INDEBUG(morphed = true);

            GenTreePtr dest;
            GenTreePtr srcCopy;
            unsigned   fieldLclNum;
            unsigned   fieldCnt = destLclVar->lvFieldCnt;

            for (unsigned i = 0; i < fieldCnt; ++i)
            {
                fieldLclNum = destLclVar->lvFieldLclStart + i;
                dest        = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());

                noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
                // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
                dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));

                srcCopy = gtCloneExpr(initVal);
                noway_assert(srcCopy != nullptr);

                // need type of oper to be same as tree
                if (dest->gtType == TYP_LONG)
                {
                    srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
                    // copy and extend the value
                    srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
                    /* Change the types of srcCopy to TYP_LONG */
                    srcCopy->gtType = TYP_LONG;
                }
                else if (varTypeIsFloating(dest->gtType))
                {
                    srcCopy->ChangeOperConst(GT_CNS_DBL);
                    // setup the bit pattern
                    memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
                           sizeof(srcCopy->gtDblCon.gtDconVal));
                    /* Change the types of srcCopy to TYP_DOUBLE */
                    srcCopy->gtType = TYP_DOUBLE;
                }
                else
                {
                    noway_assert(srcCopy->gtOper == GT_CNS_INT);
                    noway_assert(srcCopy->TypeGet() == TYP_INT);
                    // setup the bit pattern
                    memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
                           sizeof(srcCopy->gtIntCon.gtIconVal));
                }

                srcCopy->gtType = dest->TypeGet();

                asg = gtNewAssignNode(dest, srcCopy);

#if LOCAL_ASSERTION_PROP
                if (optLocalAssertionProp)
                {
                    optAssertionGen(asg);
                }
#endif // LOCAL_ASSERTION_PROP

                if (tree)
                {
                    tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
                }
                else
                {
                    tree = asg;
                }
            }
        }
    }

#ifdef DEBUG
    if (morphed)
    {
        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;

        if (verbose)
        {
            printf("fgMorphInitBlock (after):\n");
            gtDispTree(tree);
        }
    }
#endif

    return tree;
}

//------------------------------------------------------------------------
// fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
//
// Arguments:
//    tree - the node to be modified.
//    type - the type of indirection to change it to.
//
// Return Value:
//    Returns the node, modified in place.
//
// Notes:
//    This doesn't really warrant a separate method, but is here to abstract
//    the fact that these nodes can be modified in-place.

GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
{
    tree->SetOper(GT_IND);
    tree->gtType = type;
    return tree;
}

//------------------------------------------------------------------------
// fgMorphGetStructAddr: Gets the address of a struct object
//
// Arguments:
//    pTree    - the parent's pointer to the struct object node
//    clsHnd   - the class handle for the struct type
//    isRValue - true if this is a source (not dest)
//
// Return Value:
//    Returns the address of the struct value, possibly modifying the existing tree to
//    sink the address below any comma nodes (this is to canonicalize for value numbering).
//    If this is a source, it will morph it to an GT_IND before taking its address,
//    since it may not be remorphed (and we don't want blk nodes as rvalues).

GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
{
    GenTree* addr;
    GenTree* tree = *pTree;
    // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
    // need to hang onto that for the purposes of value numbering.
    if (tree->OperIsIndir())
    {
        if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
        {
            addr = tree->gtOp.gtOp1;
        }
        else
        {
            if (isRValue && tree->OperIsBlk())
            {
                tree->ChangeOper(GT_IND);
            }
            addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
        }
    }
    else if (tree->gtOper == GT_COMMA)
    {
        // If this is a comma, we're going to "sink" the GT_ADDR below it.
        (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
        tree->gtType = TYP_BYREF;
        addr         = tree;
    }
    else
    {
        switch (tree->gtOper)
        {
            case GT_LCL_FLD:
            case GT_LCL_VAR:
            case GT_INDEX:
            case GT_FIELD:
            case GT_ARR_ELEM:
                addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
                break;
            default:
            {
                // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
                // not going to use "temp"
                GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
                addr          = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
                break;
            }
        }
    }
    *pTree = addr;
    return addr;
}

//------------------------------------------------------------------------
// fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
//
// Arguments:
//    tree   - The struct type node
//    isDest - True if this is the destination of the assignment
//
// Return Value:
//    Returns the possibly-morphed node. The caller is responsible for updating
//    the parent of this node..

GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
{
    if (tree->gtOper == GT_COMMA)
    {
        GenTree* effectiveVal = tree->gtEffectiveVal();
        GenTree* addr         = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
#ifdef DEBUG
        addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
        // In order to CSE and value number array index expressions and bounds checks,
        // the commas in which they are contained need to match.
        // The pattern is that the COMMA should be the address expression.
        // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
        // TODO-1stClassStructs: Consider whether this can be improved.
        // Also consider whether some of this can be included in gtNewBlockVal (though note
        // that doing so may cause us to query the type system before we otherwise would).
        GenTree* lastComma = nullptr;
        for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
        {
            next->gtType = TYP_BYREF;
            lastComma    = next;
        }
        if (lastComma != nullptr)
        {
            noway_assert(lastComma->gtGetOp2() == effectiveVal);
            lastComma->gtOp.gtOp2 = addr;
            addr                  = tree;
        }
        var_types structType = effectiveVal->TypeGet();
        if (structType == TYP_STRUCT)
        {
            CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
            if (structHnd == NO_CLASS_HANDLE)
            {
                tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
            }
            else
            {
                tree = gtNewObjNode(structHnd, addr);
                if (tree->OperGet() == GT_OBJ)
                {
                    gtSetObjGcInfo(tree->AsObj());
                }
            }
        }
        else
        {
            tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
        }
#ifdef DEBUG
        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
    }

    if (!tree->OperIsBlk())
    {
        return tree;
    }
    GenTreeBlk* blkNode = tree->AsBlk();
    if (blkNode->OperGet() == GT_DYN_BLK)
    {
        if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
        {
            unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
            // A GT_BLK with size of zero is not supported,
            // so if we encounter such a thing we just leave it as a GT_DYN_BLK
            if (size != 0)
            {
                blkNode->AsDynBlk()->gtDynamicSize = nullptr;
                blkNode->ChangeOper(GT_BLK);
                blkNode->gtBlkSize = size;
            }
            else
            {
                return tree;
            }
        }
        else
        {
            return tree;
        }
    }
    if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
        (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
    {
        GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
        if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
        {
            lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
        }
    }

    return tree;
}

//------------------------------------------------------------------------
// fgMorphBlockOperand: Canonicalize an operand of a block assignment
//
// Arguments:
//    tree     - The block operand
//    asgType  - The type of the assignment
//    blockWidth - The size of the block
//    isDest     - true iff this is the destination of the assignment
//
// Return Value:
//    Returns the morphed block operand
//
// Notes:
//    This does the following:
//    - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
//    - Ensures that any COMMAs are above ADDR nodes.
//    Although 'tree' WAS an operand of a block assignment, the assignment
//    may have been retyped to be a scalar assignment.

GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
{
    GenTree* effectiveVal = tree->gtEffectiveVal();

    if (!varTypeIsStruct(asgType))
    {
        if (effectiveVal->OperIsIndir())
        {
            GenTree* addr = effectiveVal->AsIndir()->Addr();
            if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
            {
                effectiveVal = addr->gtGetOp1();
            }
            else if (effectiveVal->OperIsBlk())
            {
                effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
            }
            else
            {
                effectiveVal->gtType = asgType;
            }
        }
        else if (effectiveVal->TypeGet() != asgType)
        {
            GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
            effectiveVal  = gtNewOperNode(GT_IND, asgType, addr);
        }
    }
    else
    {
        GenTreeIndir*        indirTree        = nullptr;
        GenTreeLclVarCommon* lclNode          = nullptr;
        bool                 needsIndirection = true;

        if (effectiveVal->OperIsIndir())
        {
            indirTree     = effectiveVal->AsIndir();
            GenTree* addr = effectiveVal->AsIndir()->Addr();
            if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
            {
                lclNode = addr->gtGetOp1()->AsLclVarCommon();
            }
        }
        else if (effectiveVal->OperGet() == GT_LCL_VAR)
        {
            lclNode = effectiveVal->AsLclVarCommon();
        }
#ifdef FEATURE_SIMD
        if (varTypeIsSIMD(asgType))
        {
            if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
                (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
            {
                assert(!isDest);
                needsIndirection = false;
                effectiveVal     = indirTree->Addr()->gtGetOp1();
            }
            if (effectiveVal->OperIsSIMD())
            {
                needsIndirection = false;
            }
        }
#endif // FEATURE_SIMD
        if (lclNode != nullptr)
        {
            LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
            if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
            {
#ifndef LEGACY_BACKEND
                effectiveVal     = lclNode;
                needsIndirection = false;
#endif // !LEGACY_BACKEND
            }
            else
            {
                // This may be a lclVar that was determined to be address-exposed.
                effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
            }
        }
        if (needsIndirection)
        {
            if (indirTree != nullptr)
            {
                // We should never find a struct indirection on the lhs of an assignment.
                assert(!isDest || indirTree->OperIsBlk());
                if (!isDest && indirTree->OperIsBlk())
                {
                    (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
                }
            }
            else
            {
                GenTree* newTree;
                GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
                if (isDest)
                {
                    CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
                    if (clsHnd == NO_CLASS_HANDLE)
                    {
                        newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
                    }
                    else
                    {
                        newTree = gtNewObjNode(clsHnd, addr);
                        if (isDest && (newTree->OperGet() == GT_OBJ))
                        {
                            gtSetObjGcInfo(newTree->AsObj());
                        }
                        if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
                        {
                            // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
                            // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
                            // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
                            // separately now to avoid excess diffs.
                            newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
                        }
                    }
                }
                else
                {
                    newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
                }
                effectiveVal = newTree;
            }
        }
    }
    tree = effectiveVal;
    return tree;
}

//------------------------------------------------------------------------
// fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
//
// Arguments:
//    dest - the GT_OBJ or GT_STORE_OBJ
//
// Assumptions:
//    The destination must be known (by the caller) to be on the stack.
//
// Notes:
//    If we have a CopyObj with a dest on the stack, and its size is small enouch
//    to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
//    GC Unsafe CopyBlk that is non-interruptible.
//    This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
//
void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
{
#if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
    assert(dest->gtGcPtrCount != 0);
    unsigned blockWidth = dest->AsBlk()->gtBlkSize;
#ifdef DEBUG
    bool     destOnStack = false;
    GenTree* destAddr    = dest->Addr();
    assert(destAddr->IsLocalAddrExpr() != nullptr);
#endif
    if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
    {
        genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
        dest->SetOper(newOper);
        dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
    }
#endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
}

//------------------------------------------------------------------------
// fgMorphCopyBlock: Perform the Morphing of block copy
//
// Arguments:
//    tree - a block copy (i.e. an assignment with a block op on the lhs).
//
// Return Value:
//    We can return the orginal block copy unmodified (least desirable, but always correct)
//    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
//    If we have performed struct promotion of the Source() or the Dest() then we will try to
//    perform a field by field assignment for each of the promoted struct fields.
//
// Assumptions:
//    The child nodes for tree have already been Morphed.
//
// Notes:
//    If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
//    When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
//    and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
//    if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
//    can not use a field by field assignment and must the orginal block copy unmodified.

GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
{
    noway_assert(tree->OperIsCopyBlkOp());

    JITDUMP("\nfgMorphCopyBlock:");

    bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;

    GenTree* asg  = tree;
    GenTree* rhs  = asg->gtGetOp2();
    GenTree* dest = asg->gtGetOp1();

#if FEATURE_MULTIREG_RET
    // If this is a multi-reg return, we will not do any morphing of this node.
    if (rhs->IsMultiRegCall())
    {
        assert(dest->OperGet() == GT_LCL_VAR);
        JITDUMP(" not morphing a multireg call return\n");
        return tree;
    }
#endif // FEATURE_MULTIREG_RET

    // If we have an array index on the lhs, we need to create an obj node.

    dest = fgMorphBlkNode(dest, true);
    if (dest != asg->gtGetOp1())
    {
        asg->gtOp.gtOp1 = dest;
        if (dest->IsLocal())
        {
            dest->gtFlags |= GTF_VAR_DEF;
        }
    }
    asg->gtType = dest->TypeGet();
    rhs         = fgMorphBlkNode(rhs, false);

    asg->gtOp.gtOp2 = rhs;

    GenTreePtr oldTree    = tree;
    GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);

    if (oneAsgTree)
    {
        JITDUMP(" using oneAsgTree.\n");
        tree = oneAsgTree;
    }
    else
    {
        unsigned             blockWidth;
        bool                 blockWidthIsConst = false;
        GenTreeLclVarCommon* lclVarTree        = nullptr;
        GenTreeLclVarCommon* srcLclVarTree     = nullptr;
        unsigned             destLclNum        = BAD_VAR_NUM;
        LclVarDsc*           destLclVar        = nullptr;
        FieldSeqNode*        destFldSeq        = nullptr;
        bool                 destDoFldAsg      = false;
        GenTreePtr           destAddr          = nullptr;
        GenTreePtr           srcAddr           = nullptr;
        bool                 destOnStack       = false;
        bool                 hasGCPtrs         = false;

        JITDUMP("block assignment to morph:\n");
        DISPTREE(asg);

        if (dest->IsLocal())
        {
            blockWidthIsConst = true;
            destOnStack       = true;
            if (dest->gtOper == GT_LCL_VAR)
            {
                lclVarTree = dest->AsLclVarCommon();
                destLclNum = lclVarTree->gtLclNum;
                destLclVar = &lvaTable[destLclNum];
                if (destLclVar->lvType == TYP_STRUCT)
                {
                    // It would be nice if lvExactSize always corresponded to the size of the struct,
                    // but it doesn't always for the temps that the importer creates when it spills side
                    // effects.
                    // TODO-Cleanup: Determine when this happens, and whether it can be changed.
                    blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
                }
                else
                {
                    blockWidth = genTypeSize(destLclVar->lvType);
                }
                hasGCPtrs = destLclVar->lvStructGcCount != 0;
            }
            else
            {
                assert(dest->TypeGet() != TYP_STRUCT);
                assert(dest->gtOper == GT_LCL_FLD);
                blockWidth = genTypeSize(dest->TypeGet());
                destAddr   = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
                destFldSeq = dest->AsLclFld()->gtFieldSeq;
            }
        }
        else
        {
            GenTree* effectiveDest = dest->gtEffectiveVal();
            if (effectiveDest->OperGet() == GT_IND)
            {
                assert(dest->TypeGet() != TYP_STRUCT);
                blockWidth        = genTypeSize(effectiveDest->TypeGet());
                blockWidthIsConst = true;
                if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
                {
                    destAddr = dest->gtGetOp1();
                }
            }
            else
            {
                assert(effectiveDest->OperIsBlk());
                GenTreeBlk* blk = effectiveDest->AsBlk();

                blockWidth        = blk->gtBlkSize;
                blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
                if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
                {
                    destAddr = blk->Addr();
                }
            }
            if (destAddr != nullptr)
            {
                noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
                if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
                {
                    destOnStack = true;
                    destLclNum  = lclVarTree->gtLclNum;
                    destLclVar  = &lvaTable[destLclNum];
                }
            }
        }

        if (destLclVar != nullptr)
        {
#if LOCAL_ASSERTION_PROP
            // Kill everything about destLclNum (and its field locals)
            if (optLocalAssertionProp)
            {
                if (optAssertionCount > 0)
                {
                    fgKillDependentAssertions(destLclNum DEBUGARG(tree));
                }
            }
#endif // LOCAL_ASSERTION_PROP

            if (destLclVar->lvPromoted && blockWidthIsConst)
            {
                noway_assert(varTypeIsStruct(destLclVar));
                noway_assert(!opts.MinOpts());

                if (blockWidth == destLclVar->lvExactSize)
                {
                    JITDUMP(" (destDoFldAsg=true)");
                    // We may decide later that a copyblk is required when this struct has holes
                    destDoFldAsg = true;
                }
                else
                {
                    JITDUMP(" with mismatched dest size");
                }
            }
        }

        FieldSeqNode* srcFldSeq   = nullptr;
        unsigned      srcLclNum   = BAD_VAR_NUM;
        LclVarDsc*    srcLclVar   = nullptr;
        bool          srcDoFldAsg = false;

        if (rhs->IsLocal())
        {
            srcLclVarTree = rhs->AsLclVarCommon();
            srcLclNum     = srcLclVarTree->gtLclNum;
            if (rhs->OperGet() == GT_LCL_FLD)
            {
                srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
            }
        }
        else if (rhs->OperIsIndir())
        {
            if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
            {
                srcLclNum = srcLclVarTree->gtLclNum;
            }
            else
            {
                srcAddr = rhs->gtOp.gtOp1;
            }
        }

        if (srcLclNum != BAD_VAR_NUM)
        {
            srcLclVar = &lvaTable[srcLclNum];

            if (srcLclVar->lvPromoted && blockWidthIsConst)
            {
                noway_assert(varTypeIsStruct(srcLclVar));
                noway_assert(!opts.MinOpts());

                if (blockWidth == srcLclVar->lvExactSize)
                {
                    JITDUMP(" (srcDoFldAsg=true)");
                    // We may decide later that a copyblk is required when this struct has holes
                    srcDoFldAsg = true;
                }
                else
                {
                    JITDUMP(" with mismatched src size");
                }
            }
        }

        // Check to see if we are required to do a copy block because the struct contains holes
        // and either the src or dest is externally visible
        //
        bool requiresCopyBlock   = false;
        bool srcSingleLclVarAsg  = false;
        bool destSingleLclVarAsg = false;

        if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
        {
            // Self-assign; no effect.
            GenTree* nop = gtNewNothingNode();
            INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
            return nop;
        }

        // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
        if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
        {
            requiresCopyBlock = true;
        }

        // Can we use field by field assignment for the dest?
        if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
        {
            JITDUMP(" dest contains custom layout and contains holes");
            // C++ style CopyBlock with holes
            requiresCopyBlock = true;
        }

        // Can we use field by field assignment for the src?
        if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
        {
            JITDUMP(" src contains custom layout and contains holes");
            // C++ style CopyBlock with holes
            requiresCopyBlock = true;
        }

#if defined(_TARGET_ARM_)
        if ((rhs->OperIsIndir()) && (rhs->gtFlags & GTF_IND_UNALIGNED))
        {
            JITDUMP(" rhs is unaligned");
            requiresCopyBlock = true;
        }

        if (asg->gtFlags & GTF_BLK_UNALIGNED)
        {
            JITDUMP(" asg is unaligned");
            requiresCopyBlock = true;
        }
#endif // _TARGET_ARM_

        if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
        {
            requiresCopyBlock = true;
        }

        // Can't use field by field assignment if the src is a call.
        if (rhs->OperGet() == GT_CALL)
        {
            JITDUMP(" src is a call");
            // C++ style CopyBlock with holes
            requiresCopyBlock = true;
        }

        // If we passed the above checks, then we will check these two
        if (!requiresCopyBlock)
        {
            // Are both dest and src promoted structs?
            if (destDoFldAsg && srcDoFldAsg)
            {
                // Both structs should be of the same type, or each have a single field of the same type.
                // If not we will use a copy block.
                if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
                    lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
                {
                    unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
                    unsigned srcFieldNum  = lvaTable[srcLclNum].lvFieldLclStart;
                    if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
                        (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
                    {
                        requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
                        JITDUMP(" with mismatched types");
                    }
                }
            }
            // Are neither dest or src promoted structs?
            else if (!destDoFldAsg && !srcDoFldAsg)
            {
                requiresCopyBlock = true; // Leave as a CopyBlock
                JITDUMP(" with no promoted structs");
            }
            else if (destDoFldAsg)
            {
                // Match the following kinds of trees:
                //  fgMorphTree BB01, stmt 9 (before)
                //   [000052] ------------        const     int    8
                //   [000053] -A--G-------     copyBlk   void
                //   [000051] ------------           addr      byref
                //   [000050] ------------              lclVar    long   V07 loc5
                //   [000054] --------R---        <list>    void
                //   [000049] ------------           addr      byref
                //   [000048] ------------              lclVar    struct(P) V06 loc4
                //                                              long   V06.h (offs=0x00) -> V17 tmp9
                // Yields this transformation
                //  fgMorphCopyBlock (after):
                //   [000050] ------------        lclVar    long   V07 loc5
                //   [000085] -A----------     =         long
                //   [000083] D------N----        lclVar    long   V17 tmp9
                //
                if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
                    (blockWidth == genTypeSize(srcLclVar->TypeGet())))
                {
                    // Reject the following tree:
                    //  - seen on x86chk    jit\jit64\hfa\main\hfa_sf3E_r.exe
                    //
                    //  fgMorphTree BB01, stmt 6 (before)
                    //   [000038] -------------        const     int    4
                    //   [000039] -A--G--------     copyBlk   void
                    //   [000037] -------------           addr      byref
                    //   [000036] -------------              lclVar    int    V05 loc3
                    //   [000040] --------R----        <list>    void
                    //   [000035] -------------           addr      byref
                    //   [000034] -------------              lclVar    struct(P) V04 loc2
                    //                                          float  V04.f1 (offs=0x00) -> V13 tmp6
                    // As this would framsform into
                    //   float V13 = int V05
                    //
                    unsigned  fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
                    var_types destType    = lvaTable[fieldLclNum].TypeGet();
                    if (srcLclVar->TypeGet() == destType)
                    {
                        srcSingleLclVarAsg = true;
                    }
                }
            }
            else
            {
                assert(srcDoFldAsg);
                // Check for the symmetric case (which happens for the _pointer field of promoted spans):
                //
                //               [000240] -----+------             /--*  lclVar    struct(P) V18 tmp9
                //                                                  /--*    byref  V18._value (offs=0x00) -> V30 tmp21
                //               [000245] -A------R---             *  =         struct (copy)
                //               [000244] -----+------             \--*  obj(8)    struct
                //               [000243] -----+------                \--*  addr      byref
                //               [000242] D----+-N----                   \--*  lclVar    byref  V28 tmp19
                //
                if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
                    (blockWidth == genTypeSize(destLclVar->TypeGet())))
                {
                    // Check for type agreement
                    unsigned  fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
                    var_types srcType     = lvaTable[fieldLclNum].TypeGet();
                    if (destLclVar->TypeGet() == srcType)
                    {
                        destSingleLclVarAsg = true;
                    }
                }
            }
        }

        // If we require a copy block the set both of the field assign bools to false
        if (requiresCopyBlock)
        {
            // If a copy block is required then we won't do field by field assignments
            destDoFldAsg = false;
            srcDoFldAsg  = false;
        }

        JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");

        // Mark the dest/src structs as DoNotEnreg
        // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
        // or the struct is not promoted
        //
        if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
        {
            if (!destLclVar->lvRegStruct)
            {
                // Mark it as DoNotEnregister.
                lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
            }
        }

        if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
        {
            if (!srcLclVar->lvRegStruct)
            {
                lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
            }
        }

        if (requiresCopyBlock)
        {
#if CPU_USES_BLOCK_MOVE
            compBlkOpUsed = true;
#endif
            var_types asgType = dest->TypeGet();
            dest              = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
            asg->gtOp.gtOp1   = dest;
            asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);

            // Note that the unrolling of CopyBlk is only implemented on some platforms.
            // Currently that includes x64 and ARM but not x86: the code generation for this
            // construct requires the ability to mark certain regions of the generated code
            // as non-interruptible, and the GC encoding for the latter platform does not
            // have this capability.

            // If we have a CopyObj with a dest on the stack
            // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
            // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
            // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
            //
            if (destOnStack && (dest->OperGet() == GT_OBJ))
            {
                fgMorphUnsafeBlk(dest->AsObj());
            }

            // Eliminate the "OBJ or BLK" node on the rhs.
            rhs             = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
            asg->gtOp.gtOp2 = rhs;

#ifdef LEGACY_BACKEND
            if (!rhs->OperIsIndir())
            {
                noway_assert(rhs->gtOper == GT_LCL_VAR);
                GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
                rhs              = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
            }
#endif // LEGACY_BACKEND
            // Formerly, liveness did not consider copyblk arguments of simple types as being
            // a use or def, so these variables were marked as address-exposed.
            // TODO-1stClassStructs: This should no longer be needed.
            if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
            {
                JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
                lvaTable[srcLclNum].lvAddrExposed = true;
            }

            if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
            {
                JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
                lvaTable[destLclNum].lvAddrExposed = true;
            }

            goto _Done;
        }

        //
        // Otherwise we convert this CopyBlock into individual field by field assignments
        //
        tree = nullptr;

        GenTreePtr src;
        GenTreePtr addrSpill            = nullptr;
        unsigned   addrSpillTemp        = BAD_VAR_NUM;
        bool       addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame

        unsigned fieldCnt = DUMMY_INIT(0);

        if (destDoFldAsg && srcDoFldAsg)
        {
            // To do fieldwise assignments for both sides, they'd better be the same struct type!
            // All of these conditions were checked above...
            assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
            assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);

            fieldCnt = destLclVar->lvFieldCnt;
            goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
                                // assignments.
        }
        else if (destDoFldAsg)
        {
            fieldCnt = destLclVar->lvFieldCnt;
            rhs      = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
            if (srcAddr == nullptr)
            {
                srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
            }
        }
        else
        {
            assert(srcDoFldAsg);
            fieldCnt = srcLclVar->lvFieldCnt;
            dest     = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
            if (dest->OperIsBlk())
            {
                (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
            }
            destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
        }

        if (destDoFldAsg)
        {
            noway_assert(!srcDoFldAsg);
            if (gtClone(srcAddr))
            {
                // srcAddr is simple expression. No need to spill.
                noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
            }
            else
            {
                // srcAddr is complex expression. Clone and spill it (unless the destination is
                // a struct local that only has one field, in which case we'd only use the
                // address value once...)
                if (destLclVar->lvFieldCnt > 1)
                {
                    addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
                    noway_assert(addrSpill != nullptr);
                }
            }
        }

        if (srcDoFldAsg)
        {
            noway_assert(!destDoFldAsg);

            // If we're doing field-wise stores, to an address within a local, and we copy
            // the address into "addrSpill", do *not* declare the original local var node in the
            // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
            // field-wise assignments as an "indirect" assignment to the local.
            // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
            // we clone it.)
            if (lclVarTree != nullptr)
            {
                lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
            }

            if (gtClone(destAddr))
            {
                // destAddr is simple expression. No need to spill
                noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
            }
            else
            {
                // destAddr is complex expression. Clone and spill it (unless
                // the source is a struct local that only has one field, in which case we'd only
                // use the address value once...)
                if (srcLclVar->lvFieldCnt > 1)
                {
                    addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
                    noway_assert(addrSpill != nullptr);
                }

                // TODO-CQ: this should be based on a more general
                // "BaseAddress" method, that handles fields of structs, before or after
                // morphing.
                if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
                {
                    if (addrSpill->gtOp.gtOp1->IsLocal())
                    {
                        // We will *not* consider this to define the local, but rather have each individual field assign
                        // be a definition.
                        addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
                        assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
                               PROMOTION_TYPE_INDEPENDENT);
                        addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
                                                     // local stack frame
                    }
                }
            }
        }

        if (addrSpill != nullptr)
        {
            // Spill the (complex) address to a BYREF temp.
            // Note, at most one address may need to be spilled.
            addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));

            lvaTable[addrSpillTemp].lvType = TYP_BYREF;

            if (addrSpillIsStackDest)
            {
                lvaTable[addrSpillTemp].lvStackByref = true;
            }

            tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);

#ifndef LEGACY_BACKEND
            // If we are assigning the address of a LclVar here
            // liveness does not account for this kind of address taken use.
            //
            // We have to mark this local as address exposed so
            // that we don't delete the definition for this LclVar
            // as a dead store later on.
            //
            if (addrSpill->OperGet() == GT_ADDR)
            {
                GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
                if (addrOp->IsLocal())
                {
                    unsigned lclVarNum                = addrOp->gtLclVarCommon.gtLclNum;
                    lvaTable[lclVarNum].lvAddrExposed = true;
                    lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
                }
            }
#endif // !LEGACY_BACKEND
        }

    _AssignFields:

        for (unsigned i = 0; i < fieldCnt; ++i)
        {
            FieldSeqNode* curFieldSeq = nullptr;
            if (destDoFldAsg)
            {
                noway_assert(destLclNum != BAD_VAR_NUM);
                unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
                dest                 = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
                // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
                if (destAddr != nullptr)
                {
                    noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
                    dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
                }
                else
                {
                    noway_assert(lclVarTree != nullptr);
                    dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
                }
                // Don't CSE the lhs of an assignment.
                dest->gtFlags |= GTF_DONT_CSE;
            }
            else
            {
                noway_assert(srcDoFldAsg);
                noway_assert(srcLclNum != BAD_VAR_NUM);
                unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;

                if (destSingleLclVarAsg)
                {
                    noway_assert(fieldCnt == 1);
                    noway_assert(destLclVar != nullptr);
                    noway_assert(addrSpill == nullptr);

                    dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
                }
                else
                {
                    if (addrSpill)
                    {
                        assert(addrSpillTemp != BAD_VAR_NUM);
                        dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
                    }
                    else
                    {
                        dest = gtCloneExpr(destAddr);
                        noway_assert(dest != nullptr);

                        // Is the address of a local?
                        GenTreeLclVarCommon* lclVarTree = nullptr;
                        bool                 isEntire   = false;
                        bool*                pIsEntire  = (blockWidthIsConst ? &isEntire : nullptr);
                        if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
                        {
                            lclVarTree->gtFlags |= GTF_VAR_DEF;
                            if (!isEntire)
                            {
                                lclVarTree->gtFlags |= GTF_VAR_USEASG;
                            }
                        }
                    }

                    GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
                    // Have to set the field sequence -- which means we need the field handle.
                    CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
                    CORINFO_FIELD_HANDLE fieldHnd =
                        info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
                    curFieldSeq                          = GetFieldSeqStore()->CreateSingleton(fieldHnd);
                    fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;

                    dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);

                    dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);

                    // !!! The destination could be on stack. !!!
                    // This flag will let us choose the correct write barrier.
                    dest->gtFlags |= GTF_IND_TGTANYWHERE;
                }
            }

            if (srcDoFldAsg)
            {
                noway_assert(srcLclNum != BAD_VAR_NUM);
                unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
                src                  = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());

                noway_assert(srcLclVarTree != nullptr);
                src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
                // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
                // but they are when they are under a GT_ADDR.
                src->gtFlags |= GTF_DONT_CSE;
            }
            else
            {
                noway_assert(destDoFldAsg);
                noway_assert(destLclNum != BAD_VAR_NUM);
                unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;

                if (srcSingleLclVarAsg)
                {
                    noway_assert(fieldCnt == 1);
                    noway_assert(srcLclVar != nullptr);
                    noway_assert(addrSpill == nullptr);

                    src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
                }
                else
                {
                    if (addrSpill)
                    {
                        assert(addrSpillTemp != BAD_VAR_NUM);
                        src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
                    }
                    else
                    {
                        src = gtCloneExpr(srcAddr);
                        noway_assert(src != nullptr);
                    }

                    CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
                    CORINFO_FIELD_HANDLE fieldHnd =
                        info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
                    curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);

                    src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
                                        new (this, GT_CNS_INT)
                                            GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));

                    src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
                }
            }

            noway_assert(dest->TypeGet() == src->TypeGet());

            asg = gtNewAssignNode(dest, src);

            // If we spilled the address, and we didn't do individual field assignments to promoted fields,
            // and it was of a local, record the assignment as an indirect update of a local.
            if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
            {
                curFieldSeq   = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
                bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
                IndirectAssignmentAnnotation* pIndirAnnot =
                    new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
                GetIndirAssignMap()->Set(asg, pIndirAnnot);
            }

#if LOCAL_ASSERTION_PROP
            if (optLocalAssertionProp)
            {
                optAssertionGen(asg);
            }
#endif // LOCAL_ASSERTION_PROP

            if (tree)
            {
                tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
            }
            else
            {
                tree = asg;
            }
        }
    }

    if (isLateArg)
    {
        tree->gtFlags |= GTF_LATE_ARG;
    }

#ifdef DEBUG
    if (tree != oldTree)
    {
        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
    }

    if (verbose)
    {
        printf("\nfgMorphCopyBlock (after):\n");
        gtDispTree(tree);
    }
#endif

_Done:
    return tree;
}

// insert conversions and normalize to make tree amenable to register
// FP architectures
GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
{
    if (tree->OperIsArithmetic())
    {
        if (varTypeIsFloating(tree))
        {
            GenTreePtr op1 = tree->gtOp.gtOp1;
            GenTreePtr op2 = tree->gtGetOp2();

            if (op1->TypeGet() != tree->TypeGet())
            {
                tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
            }
            if (op2->TypeGet() != tree->TypeGet())
            {
                tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
            }
        }
    }
    else if (tree->OperIsCompare())
    {
        GenTreePtr op1 = tree->gtOp.gtOp1;

        if (varTypeIsFloating(op1))
        {
            GenTreePtr op2 = tree->gtGetOp2();
            assert(varTypeIsFloating(op2));

            if (op1->TypeGet() != op2->TypeGet())
            {
                // both had better be floating, just one bigger than other
                if (op1->TypeGet() == TYP_FLOAT)
                {
                    assert(op2->TypeGet() == TYP_DOUBLE);
                    tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
                }
                else if (op2->TypeGet() == TYP_FLOAT)
                {
                    assert(op1->TypeGet() == TYP_DOUBLE);
                    tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
                }
            }
        }
    }

    return tree;
}

GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
{
    GenTree*     op1 = compare->gtOp.gtOp1;
    GenTree*     op2 = compare->gtOp.gtOp2;
    GenTree*     opCns;
    GenTreeCall* opCall;

    // recognize this pattern:
    //
    // stmtExpr  void  (IL 0x000...  ???)
    //     return    int
    //             const     ref    null
    //         ==        int
    //             call help ref    HELPER.CORINFO_HELP_BOX_NULLABLE
    //                 const(h)  long   0x7fed96836c8 class
    //                 addr      byref
    //                     ld.lclVar struct V00 arg0
    //
    //
    // which comes from this code (reported by customer as being slow) :
    //
    // private static bool IsNull<T>(T arg)
    // {
    //    return arg==null;
    // }
    //

    if (op1->IsCnsIntOrI() && op2->IsHelperCall())
    {
        opCns  = op1;
        opCall = op2->AsCall();
    }
    else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
    {
        opCns  = op2;
        opCall = op1->AsCall();
    }
    else
    {
        return compare;
    }

    if (!opCns->IsIntegralConst(0))
    {
        return compare;
    }

    if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
    {
        return compare;
    }

    // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
    GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);

    if (opCall == op1)
    {
        compare->gtOp.gtOp1 = newOp;
    }
    else
    {
        compare->gtOp.gtOp2 = newOp;
    }

    return compare;
}

#ifdef FEATURE_SIMD

//--------------------------------------------------------------------------------------------------------------
// getSIMDStructFromField:
//   Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
//   the struct node, also base type, field index and simd size. If it is not, just return  nullptr.
//   Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
//   should return nullptr, since in this case we should treat SIMD struct as a regular struct.
//   However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
//   as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
//   if the struct is a SIMD struct.
//
// Arguments:
//       tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
//               struct used for simd intrinsic or not.
//       pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
//                      to simd lclvar's base type.
//       indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
//                  equals to the index number of this field.
//       simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
//                     equals to the simd struct size which this tree belongs to.
//      ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
//                                  the UsedInSIMDIntrinsic check.
//
// return value:
//       A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
//       instrinic related field, return nullptr.
//

GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
                                            var_types* pBaseTypeOut,
                                            unsigned*  indexOut,
                                            unsigned*  simdSizeOut,
                                            bool       ignoreUsedInSIMDIntrinsic /*false*/)
{
    GenTreePtr ret = nullptr;
    if (tree->OperGet() == GT_FIELD)
    {
        GenTreePtr objRef = tree->gtField.gtFldObj;
        if (objRef != nullptr)
        {
            GenTreePtr obj = nullptr;
            if (objRef->gtOper == GT_ADDR)
            {
                obj = objRef->gtOp.gtOp1;
            }
            else if (ignoreUsedInSIMDIntrinsic)
            {
                obj = objRef;
            }
            else
            {
                return nullptr;
            }

            if (isSIMDTypeLocal(obj))
            {
                unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
                LclVarDsc* varDsc = &lvaTable[lclNum];
                if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
                {
                    *simdSizeOut  = varDsc->lvExactSize;
                    *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
                    ret           = obj;
                }
            }
            else if (obj->OperGet() == GT_SIMD)
            {
                ret                   = obj;
                GenTreeSIMD* simdNode = obj->AsSIMD();
                *simdSizeOut          = simdNode->gtSIMDSize;
                *pBaseTypeOut         = simdNode->gtSIMDBaseType;
            }
        }
    }
    if (ret != nullptr)
    {
        unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
        *indexOut             = tree->gtField.gtFldOffset / BaseTypeSize;
    }
    return ret;
}

/*****************************************************************************
*  If a read operation tries to access simd struct field, then transform the
*  operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
*  Otherwise, return the old tree.
*  Argument:
*   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
*          intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
*  Return:
*   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
*   return nullptr.
*/

GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
{
    unsigned   index          = 0;
    var_types  baseType       = TYP_UNKNOWN;
    unsigned   simdSize       = 0;
    GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
    if (simdStructNode != nullptr)
    {
        assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
        GenTree* op2 = gtNewIconNode(index);
        tree         = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
#ifdef DEBUG
        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
    }
    return tree;
}

/*****************************************************************************
*  Transform an assignment of a SIMD struct field to SIMD intrinsic
*  SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
*  then return the old tree.
*  Argument:
*   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
*          intrinsic, we will morph it as simd intrinsic set.
*  Return:
*   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
*   return nullptr.
*/

GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
{
    assert(tree->OperGet() == GT_ASG);
    GenTreePtr op1 = tree->gtGetOp1();
    GenTreePtr op2 = tree->gtGetOp2();

    unsigned   index         = 0;
    var_types  baseType      = TYP_UNKNOWN;
    unsigned   simdSize      = 0;
    GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
    if (simdOp1Struct != nullptr)
    {
        // Generate the simd set intrinsic
        assert(simdSize >= ((index + 1) * genTypeSize(baseType)));

        SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
        switch (index)
        {
            case 0:
                simdIntrinsicID = SIMDIntrinsicSetX;
                break;
            case 1:
                simdIntrinsicID = SIMDIntrinsicSetY;
                break;
            case 2:
                simdIntrinsicID = SIMDIntrinsicSetZ;
                break;
            case 3:
                simdIntrinsicID = SIMDIntrinsicSetW;
                break;
            default:
                noway_assert(!"There is no set intrinsic for index bigger than 3");
        }

        GenTreePtr target = gtClone(simdOp1Struct);
        assert(target != nullptr);
        GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
        tree->gtOp.gtOp1    = target;
        tree->gtOp.gtOp2    = simdTree;
#ifdef DEBUG
        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
    }

    return tree;
}

#endif // FEATURE_SIMD

/*****************************************************************************
 *
 *  Transform the given GTK_SMPOP tree for code generation.
 */

#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
{
    // this extra scope is a workaround for a gcc bug
    // the inline destructor for ALLOCA_CHECK confuses the control
    // flow and gcc thinks that the function never returns
    {
        ALLOCA_CHECK();
        noway_assert(tree->OperKind() & GTK_SMPOP);

        /* The steps in this function are :
           o Perform required preorder processing
           o Process the first, then second operand, if any
           o Perform required postorder morphing
           o Perform optional postorder morphing if optimizing
         */

        bool isQmarkColon = false;

#if LOCAL_ASSERTION_PROP
        AssertionIndex origAssertionCount = DUMMY_INIT(0);
        AssertionDsc*  origAssertionTab   = DUMMY_INIT(NULL);

        AssertionIndex thenAssertionCount = DUMMY_INIT(0);
        AssertionDsc*  thenAssertionTab   = DUMMY_INIT(NULL);
#endif

        if (fgGlobalMorph)
        {
#if !FEATURE_STACK_FP_X87
            tree = fgMorphForRegisterFP(tree);
#endif
        }

        genTreeOps oper = tree->OperGet();
        var_types  typ  = tree->TypeGet();
        GenTreePtr op1  = tree->gtOp.gtOp1;
        GenTreePtr op2  = tree->gtGetOp2IfPresent();

        /*-------------------------------------------------------------------------
         * First do any PRE-ORDER processing
         */

        switch (oper)
        {
            // Some arithmetic operators need to use a helper call to the EE
            int helper;

            case GT_ASG:
                tree = fgDoNormalizeOnStore(tree);
                /* fgDoNormalizeOnStore can change op2 */
                noway_assert(op1 == tree->gtOp.gtOp1);
                op2 = tree->gtOp.gtOp2;

#ifdef FEATURE_SIMD
                {
                    // We should check whether op2 should be assigned to a SIMD field or not.
                    // If it is, we should tranlate the tree to simd intrinsic.
                    assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
                    GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
                    typ                = tree->TypeGet();
                    op1                = tree->gtGetOp1();
                    op2                = tree->gtGetOp2();
#ifdef DEBUG
                    assert((tree == newTree) && (tree->OperGet() == oper));
                    if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
                    {
                        tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
                    }
#endif // DEBUG
                }
#endif

                __fallthrough;

            case GT_ASG_ADD:
            case GT_ASG_SUB:
            case GT_ASG_MUL:
            case GT_ASG_DIV:
            case GT_ASG_MOD:
            case GT_ASG_UDIV:
            case GT_ASG_UMOD:
            case GT_ASG_OR:
            case GT_ASG_XOR:
            case GT_ASG_AND:
            case GT_ASG_LSH:
            case GT_ASG_RSH:
            case GT_ASG_RSZ:
            case GT_CHS:

                // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
                // Previously, the "lhs" (addr) of a block op was CSE'd.  So, to duplicate the former
                // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
                // TODO-1stClassStructs: improve this.
                if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
                {
                    op1->gtFlags |= GTF_DONT_CSE;
                }
                break;

            case GT_ADDR:

                /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
                op1->gtFlags |= GTF_DONT_CSE;
                break;

            case GT_QMARK:
            case GT_JTRUE:

                noway_assert(op1);

                if (op1->OperKind() & GTK_RELOP)
                {
                    noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
                    /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
                       not need to materialize the result as a 0 or 1. */

                    /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
                    op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);

                    // Request that the codegen for op1 sets the condition flags
                    // when it generates the code for op1.
                    //
                    // Codegen for op1 must set the condition flags if
                    // this method returns true.
                    //
                    op1->gtRequestSetFlags();
                }
                else
                {
                    GenTreePtr effOp1 = op1->gtEffectiveVal();
                    noway_assert((effOp1->gtOper == GT_CNS_INT) &&
                                 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
                }
                break;

            case GT_COLON:
#if LOCAL_ASSERTION_PROP
                if (optLocalAssertionProp)
#endif
                {
                    isQmarkColon = true;
                }
                break;

            case GT_INDEX:
                return fgMorphArrayIndex(tree);

            case GT_CAST:
                return fgMorphCast(tree);

            case GT_MUL:

#ifndef _TARGET_64BIT_
                if (typ == TYP_LONG)
                {
                    /* For (long)int1 * (long)int2, we dont actually do the
                       casts, and just multiply the 32 bit values, which will
                       give us the 64 bit result in edx:eax */

                    noway_assert(op2);
                    if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
                         genActualType(op1->CastFromType()) == TYP_INT &&
                         genActualType(op2->CastFromType()) == TYP_INT) &&
                        !op1->gtOverflow() && !op2->gtOverflow())
                    {
                        // The casts have to be of the same signedness.
                        if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
                        {
                            // We see if we can force an int constant to change its signedness
                            GenTreePtr constOp;
                            if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
                                constOp = op1;
                            else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
                                constOp = op2;
                            else
                                goto NO_MUL_64RSLT;

                            if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
                                constOp->gtFlags ^= GTF_UNSIGNED;
                            else
                                goto NO_MUL_64RSLT;
                        }

                        // The only combination that can overflow
                        if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
                            goto NO_MUL_64RSLT;

                        /* Remaining combinations can never overflow during long mul. */

                        tree->gtFlags &= ~GTF_OVERFLOW;

                        /* Do unsigned mul only if the casts were unsigned */

                        tree->gtFlags &= ~GTF_UNSIGNED;
                        tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;

                        /* Since we are committing to GTF_MUL_64RSLT, we don't want
                           the casts to be folded away. So morph the castees directly */

                        op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
                        op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);

                        // Propagate side effect flags up the tree
                        op1->gtFlags &= ~GTF_ALL_EFFECT;
                        op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
                        op2->gtFlags &= ~GTF_ALL_EFFECT;
                        op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);

                        // If the GT_MUL can be altogether folded away, we should do that.

                        if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
                            opts.OptEnabled(CLFLG_CONSTANTFOLD))
                        {
                            tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
                            tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
                            noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
                            tree = gtFoldExprConst(tree);
                            noway_assert(tree->OperIsConst());
                            return tree;
                        }

                        tree->gtFlags |= GTF_MUL_64RSLT;

                        // If op1 and op2 are unsigned casts, we need to do an unsigned mult
                        tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);

                        // Insert GT_NOP nodes for the cast operands so that they do not get folded
                        // And propagate the new flags. We don't want to CSE the casts because
                        // codegen expects GTF_MUL_64RSLT muls to have a certain layout.

                        if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
                        {
                            op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
                            op1->gtFlags &= ~GTF_ALL_EFFECT;
                            op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
                        }

                        if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
                        {
                            op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
                            op2->gtFlags &= ~GTF_ALL_EFFECT;
                            op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
                        }

                        op1->gtFlags |= GTF_DONT_CSE;
                        op2->gtFlags |= GTF_DONT_CSE;

                        tree->gtFlags &= ~GTF_ALL_EFFECT;
                        tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);

                        goto DONE_MORPHING_CHILDREN;
                    }
                    else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
                    {
                    NO_MUL_64RSLT:
                        if (tree->gtOverflow())
                            helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
                        else
                            helper = CORINFO_HELP_LMUL;

                        goto USE_HELPER_FOR_ARITH;
                    }
                    else
                    {
                        /* We are seeing this node again. We have decided to use
                           GTF_MUL_64RSLT, so leave it alone. */

                        assert(tree->gtIsValid64RsltMul());
                    }
                }
#endif // !_TARGET_64BIT_
                break;

            case GT_DIV:

#ifndef _TARGET_64BIT_
                if (typ == TYP_LONG)
                {
                    helper = CORINFO_HELP_LDIV;
                    goto USE_HELPER_FOR_ARITH;
                }

#if USE_HELPERS_FOR_INT_DIV
                if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
                {
                    helper = CORINFO_HELP_DIV;
                    goto USE_HELPER_FOR_ARITH;
                }
#endif
#endif // !_TARGET_64BIT_

#ifndef LEGACY_BACKEND
                if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
                {
                    op2 = gtFoldExprConst(op2);
                }
#endif // !LEGACY_BACKEND
                break;

            case GT_UDIV:

#ifndef _TARGET_64BIT_
                if (typ == TYP_LONG)
                {
                    helper = CORINFO_HELP_ULDIV;
                    goto USE_HELPER_FOR_ARITH;
                }
#if USE_HELPERS_FOR_INT_DIV
                if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
                {
                    helper = CORINFO_HELP_UDIV;
                    goto USE_HELPER_FOR_ARITH;
                }
#endif
#endif // _TARGET_64BIT_
                break;

            case GT_MOD:

                if (varTypeIsFloating(typ))
                {
                    helper = CORINFO_HELP_DBLREM;
                    noway_assert(op2);
                    if (op1->TypeGet() == TYP_FLOAT)
                    {
                        if (op2->TypeGet() == TYP_FLOAT)
                        {
                            helper = CORINFO_HELP_FLTREM;
                        }
                        else
                        {
                            tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
                        }
                    }
                    else if (op2->TypeGet() == TYP_FLOAT)
                    {
                        tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
                    }
                    goto USE_HELPER_FOR_ARITH;
                }

                // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
                // A similar optimization for signed mod will not work for a negative perfectly divisible
                // HI-word. To make it correct, we would need to divide without the sign and then flip the
                // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
                goto ASSIGN_HELPER_FOR_MOD;

            case GT_UMOD:

#ifdef _TARGET_ARMARCH_
//
// Note for _TARGET_ARMARCH_ we don't have  a remainder instruction, so we don't do this optimization
//
#else  // _TARGET_XARCH
                /* If this is an unsigned long mod with op2 which is a cast to long from a
                   constant int, then don't morph to a call to the helper.  This can be done
                   faster inline using idiv.
                */

                noway_assert(op2);
                if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
                    ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
                    ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
                {
                    if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
                        op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
                        op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
                        (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
                    {
                        tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
                        noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
                    }

                    if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
                        op2->gtIntConCommon.LngValue() <= 0x3fffffff)
                    {
                        tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
                        noway_assert(op1->TypeGet() == TYP_LONG);

                        // Update flags for op1 morph
                        tree->gtFlags &= ~GTF_ALL_EFFECT;

                        tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant

                        // If op1 is a constant, then do constant folding of the division operator
                        if (op1->gtOper == GT_CNS_NATIVELONG)
                        {
                            tree = gtFoldExpr(tree);
                        }
                        return tree;
                    }
                }
#endif // _TARGET_XARCH

            ASSIGN_HELPER_FOR_MOD:

                // For "val % 1", return 0 if op1 doesn't have any side effects
                // and we are not in the CSE phase, we cannot discard 'tree'
                // because it may contain CSE expressions that we haven't yet examined.
                //
                if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
                {
                    if (op2->IsIntegralConst(1))
                    {
                        GenTreePtr zeroNode = gtNewZeroConNode(typ);
#ifdef DEBUG
                        zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
                        DEBUG_DESTROY_NODE(tree);
                        return zeroNode;
                    }
                }

#ifndef _TARGET_64BIT_
                if (typ == TYP_LONG)
                {
                    helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
                    goto USE_HELPER_FOR_ARITH;
                }

#if USE_HELPERS_FOR_INT_DIV
                if (typ == TYP_INT)
                {
                    if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
                    {
                        helper = CORINFO_HELP_UMOD;
                        goto USE_HELPER_FOR_ARITH;
                    }
                    else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
                    {
                        helper = CORINFO_HELP_MOD;
                        goto USE_HELPER_FOR_ARITH;
                    }
                }
#endif
#endif // !_TARGET_64BIT_

#ifndef LEGACY_BACKEND
                if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
                {
                    op2 = gtFoldExprConst(op2);
                }

#ifdef _TARGET_ARM64_

                // For ARM64 we don't have a remainder instruction,
                // The architecture manual suggests the following transformation to
                // generate code for such operator:
                //
                // a % b = a - (a / b) * b;
                //
                // NOTE: we should never need to perform this transformation when remorphing, since global morphing
                //       should already have done so and we do not introduce new modulus nodes in later phases.
                assert(!optValnumCSE_phase);
                tree = fgMorphModToSubMulDiv(tree->AsOp());
                op1  = tree->gtOp.gtOp1;
                op2  = tree->gtOp.gtOp2;
#else  //_TARGET_ARM64_
                // If b is not a power of 2 constant then lowering replaces a % b
                // with a - (a / b) * b and applies magic division optimization to
                // a / b. The code may already contain an a / b expression (e.g.
                // x = a / 10; y = a % 10;) and then we end up with redundant code.
                // If we convert % to / here we give CSE the opportunity to eliminate
                // the redundant division. If there's no redundant division then
                // nothing is lost, lowering would have done this transform anyway.

                if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
                {
                    ssize_t divisorValue    = op2->AsIntCon()->IconValue();
                    size_t  absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
                                                                           : static_cast<size_t>(abs(divisorValue));

                    if (!isPow2(absDivisorValue))
                    {
                        tree = fgMorphModToSubMulDiv(tree->AsOp());
                        op1  = tree->gtOp.gtOp1;
                        op2  = tree->gtOp.gtOp2;
                    }
                }
#endif //_TARGET_ARM64_
#endif // !LEGACY_BACKEND
                break;

            USE_HELPER_FOR_ARITH:
            {
                /* We have to morph these arithmetic operations into helper calls
                   before morphing the arguments (preorder), else the arguments
                   won't get correct values of fgPtrArgCntCur.
                   However, try to fold the tree first in case we end up with a
                   simple node which won't need a helper call at all */

                noway_assert(tree->OperIsBinary());

                GenTreePtr oldTree = tree;

                tree = gtFoldExpr(tree);

                // Were we able to fold it ?
                // Note that gtFoldExpr may return a non-leaf even if successful
                // e.g. for something like "expr / 1" - see also bug #290853
                if (tree->OperIsLeaf() || (oldTree != tree))

                {
                    return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
                }

                // Did we fold it into a comma node with throw?
                if (tree->gtOper == GT_COMMA)
                {
                    noway_assert(fgIsCommaThrow(tree));
                    return fgMorphTree(tree);
                }
            }
                return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));

            case GT_RETURN:
                // normalize small integer return values
                if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
                    (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
                {
                    // Small-typed return values are normalized by the callee
                    op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);

                    // Propagate GTF_COLON_COND
                    op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);

                    tree->gtOp.gtOp1 = fgMorphCast(op1);

                    // Propagate side effect flags
                    tree->gtFlags &= ~GTF_ALL_EFFECT;
                    tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);

                    return tree;
                }
                break;

            case GT_EQ:
            case GT_NE:

                // Check for typeof(...) == obj.GetType()
                // Also check for typeof(...) == typeof(...)
                // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
                // type handles and instances of System.Type
                // If this invariant is ever broken, the optimization will need updating
                CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef LEGACY_BACKEND
                if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
                    ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
                     (op1->gtCall.gtCallType == CT_HELPER)) &&
                    ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
                     (op2->gtCall.gtCallType == CT_HELPER)))
#else
                if ((((op1->gtOper == GT_INTRINSIC) &&
                      (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
                     ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
                    (((op2->gtOper == GT_INTRINSIC) &&
                      (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
                     ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
#endif
                {
                    GenTreePtr pGetClassFromHandle;
                    GenTreePtr pGetType;

#ifdef LEGACY_BACKEND
                    bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
                    bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
#else
                    bool bOp1ClassFromHandle =
                        op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
                    bool bOp2ClassFromHandle =
                        op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
#endif

                    // Optimize typeof(...) == typeof(...)
                    // Typically this occurs in generic code that attempts a type switch
                    // e.g. typeof(T) == typeof(int)

                    if (bOp1ClassFromHandle && bOp2ClassFromHandle)
                    {
                        GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
                        GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;

                        GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);

                        compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);

                        // Morph and return
                        return fgMorphTree(compare);
                    }
                    else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
                    {
                        //
                        // Now check for GetClassFromHandle(handle) == obj.GetType()
                        //

                        if (bOp1ClassFromHandle)
                        {
                            pGetClassFromHandle = tree->gtOp.gtOp1;
                            pGetType            = op2;
                        }
                        else
                        {
                            pGetClassFromHandle = tree->gtOp.gtOp2;
                            pGetType            = op1;
                        }

                        GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
                        GenTreePtr pConstLiteral               = pGetClassFromHandleArgument;

                        // Unwrap GT_NOP node used to prevent constant folding
                        if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
                        {
                            pConstLiteral = pConstLiteral->gtOp.gtOp1;
                        }

                        // In the ngen case, we have to go thru an indirection to get the right handle.
                        if (pConstLiteral->gtOper == GT_IND)
                        {
                            pConstLiteral = pConstLiteral->gtOp.gtOp1;
                        }
#ifdef LEGACY_BACKEND

                        if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
                            info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
                                CORINFO_INTRINSIC_Object_GetType &&
#else
                        if ((pGetType->gtOper == GT_INTRINSIC) &&
                            (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
#endif
                            pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
                        {
                            CORINFO_CLASS_HANDLE clsHnd =
                                CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);

                            if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
                            {
                                // Method Table tree
                                CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef LEGACY_BACKEND
                                GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
#else
                                GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
#endif
                                objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
                                compCurBB->bbFlags |= BBF_HAS_VTABREF;
                                optMethodFlags |= OMF_HAS_VTABLEREF;

                                // Method table constant
                                GenTreePtr cnsMT = pGetClassFromHandleArgument;

                                GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);

                                compare->gtFlags |=
                                    tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);

                                // Morph and return
                                return fgMorphTree(compare);
                            }
                        }
                    }
                }
                fgMorphRecognizeBoxNullable(tree);
                op1 = tree->gtOp.gtOp1;
                op2 = tree->gtGetOp2IfPresent();

                break;

#ifdef _TARGET_ARM_
            case GT_INTRINSIC:
                if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
                {
                    switch (tree->TypeGet())
                    {
                        case TYP_DOUBLE:
                            return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
                        case TYP_FLOAT:
                            return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
                        default:
                            unreached();
                    }
                }
                break;
#endif

            default:
                break;
        }

#if !CPU_HAS_FP_SUPPORT
        tree = fgMorphToEmulatedFP(tree);
#endif

        /* Could this operator throw an exception? */
        if (fgGlobalMorph && tree->OperMayThrow())
        {
            if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
            {
                /* Mark the tree node as potentially throwing an exception */
                tree->gtFlags |= GTF_EXCEPT;
            }
        }

        /*-------------------------------------------------------------------------
         * Process the first operand, if any
         */

        if (op1)
        {

#if LOCAL_ASSERTION_PROP
            // If we are entering the "then" part of a Qmark-Colon we must
            // save the state of the current copy assignment table
            // so that we can restore this state when entering the "else" part
            if (isQmarkColon)
            {
                noway_assert(optLocalAssertionProp);
                if (optAssertionCount)
                {
                    noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
                    unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
                    origAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
                    origAssertionCount = optAssertionCount;
                    memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
                }
                else
                {
                    origAssertionCount = 0;
                    origAssertionTab   = nullptr;
                }
            }
#endif // LOCAL_ASSERTION_PROP

            // We might need a new MorphAddressContext context.  (These are used to convey
            // parent context about how addresses being calculated will be used; see the
            // specification comment for MorphAddrContext for full details.)
            // Assume it's an Ind context to start.
            MorphAddrContext  subIndMac1(MACK_Ind);
            MorphAddrContext* subMac1 = mac;
            if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
            {
                switch (tree->gtOper)
                {
                    case GT_ADDR:
                        if (subMac1 == nullptr)
                        {
                            subMac1         = &subIndMac1;
                            subMac1->m_kind = MACK_Addr;
                        }
                        break;
                    case GT_COMMA:
                        // In a comma, the incoming context only applies to the rightmost arg of the
                        // comma list.  The left arg (op1) gets a fresh context.
                        subMac1 = nullptr;
                        break;
                    case GT_OBJ:
                    case GT_BLK:
                    case GT_DYN_BLK:
                    case GT_IND:
                        subMac1 = &subIndMac1;
                        break;
                    default:
                        break;
                }
            }

            // For additions, if we're in an IND context keep track of whether
            // all offsets added to the address are constant, and their sum.
            if (tree->gtOper == GT_ADD && subMac1 != nullptr)
            {
                assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
                GenTreePtr otherOp = tree->gtOp.gtOp2;
                // Is the other operator a constant?
                if (otherOp->IsCnsIntOrI())
                {
                    ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
                    totalOffset += otherOp->gtIntConCommon.IconValue();
                    if (totalOffset.IsOverflow())
                    {
                        // We will consider an offset so large as to overflow as "not a constant" --
                        // we will do a null check.
                        subMac1->m_allConstantOffsets = false;
                    }
                    else
                    {
                        subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
                    }
                }
                else
                {
                    subMac1->m_allConstantOffsets = false;
                }
            }

            // If gtOp1 is a GT_FIELD, we need to pass down the mac if
            // its parent is GT_ADDR, since the address of the field
            // is part of an ongoing address computation. Otherwise
            // op1 represents the value of the field and so any address
            // calculations it does are in a new context.
            if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR))
            {
                subMac1 = nullptr;

                // The impact of this field's value to any ongoing
                // address computation is handled below when looking
                // at op2.
            }

            tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);

#if LOCAL_ASSERTION_PROP
            // If we are exiting the "then" part of a Qmark-Colon we must
            // save the state of the current copy assignment table
            // so that we can merge this state with the "else" part exit
            if (isQmarkColon)
            {
                noway_assert(optLocalAssertionProp);
                if (optAssertionCount)
                {
                    noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
                    unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
                    thenAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
                    thenAssertionCount = optAssertionCount;
                    memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
                }
                else
                {
                    thenAssertionCount = 0;
                    thenAssertionTab   = nullptr;
                }
            }
#endif // LOCAL_ASSERTION_PROP

            /* Morphing along with folding and inlining may have changed the
             * side effect flags, so we have to reset them
             *
             * NOTE: Don't reset the exception flags on nodes that may throw */

            noway_assert(tree->gtOper != GT_CALL);

            if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
            {
                tree->gtFlags &= ~GTF_CALL;
            }

            if (!tree->OperMayThrow())
            {
                tree->gtFlags &= ~GTF_EXCEPT;
            }

            /* Propagate the new flags */
            tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);

            // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
            // Similarly for clsVar
            if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
            {
                tree->gtFlags &= ~GTF_GLOB_REF;
            }
        } // if (op1)

        /*-------------------------------------------------------------------------
         * Process the second operand, if any
         */

        if (op2)
        {

#if LOCAL_ASSERTION_PROP
            // If we are entering the "else" part of a Qmark-Colon we must
            // reset the state of the current copy assignment table
            if (isQmarkColon)
            {
                noway_assert(optLocalAssertionProp);
                optAssertionReset(0);
                if (origAssertionCount)
                {
                    size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
                    memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
                    optAssertionReset(origAssertionCount);
                }
            }
#endif // LOCAL_ASSERTION_PROP

            // We might need a new MorphAddressContext context to use in evaluating op2.
            // (These are used to convey parent context about how addresses being calculated
            // will be used; see the specification comment for MorphAddrContext for full details.)
            // Assume it's an Ind context to start.
            switch (tree->gtOper)
            {
                case GT_ADD:
                    if (mac != nullptr && mac->m_kind == MACK_Ind)
                    {
                        GenTreePtr otherOp = tree->gtOp.gtOp1;
                        // Is the other operator a constant?
                        if (otherOp->IsCnsIntOrI())
                        {
                            mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
                        }
                        else
                        {
                            mac->m_allConstantOffsets = false;
                        }
                    }
                    break;
                default:
                    break;
            }

            // If gtOp2 is a GT_FIELD, we must be taking its value,
            // so it should evaluate its address in a new context.
            if (op2->gtOper == GT_FIELD)
            {
                // The impact of this field's value to any ongoing
                // address computation is handled above when looking
                // at op1.
                mac = nullptr;
            }

            tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);

            /* Propagate the side effect flags from op2 */

            tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);

#if LOCAL_ASSERTION_PROP
            // If we are exiting the "else" part of a Qmark-Colon we must
            // merge the state of the current copy assignment table with
            // that of the exit of the "then" part.
            if (isQmarkColon)
            {
                noway_assert(optLocalAssertionProp);
                // If either exit table has zero entries then
                // the merged table also has zero entries
                if (optAssertionCount == 0 || thenAssertionCount == 0)
                {
                    optAssertionReset(0);
                }
                else
                {
                    size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
                    if ((optAssertionCount != thenAssertionCount) ||
                        (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
                    {
                        // Yes they are different so we have to find the merged set
                        // Iterate over the copy asgn table removing any entries
                        // that do not have an exact match in the thenAssertionTab
                        AssertionIndex index = 1;
                        while (index <= optAssertionCount)
                        {
                            AssertionDsc* curAssertion = optGetAssertion(index);

                            for (unsigned j = 0; j < thenAssertionCount; j++)
                            {
                                AssertionDsc* thenAssertion = &thenAssertionTab[j];

                                // Do the left sides match?
                                if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
                                    (curAssertion->assertionKind == thenAssertion->assertionKind))
                                {
                                    // Do the right sides match?
                                    if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
                                        (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
                                    {
                                        goto KEEP;
                                    }
                                    else
                                    {
                                        goto REMOVE;
                                    }
                                }
                            }
                        //
                        // If we fall out of the loop above then we didn't find
                        // any matching entry in the thenAssertionTab so it must
                        // have been killed on that path so we remove it here
                        //
                        REMOVE:
                            // The data at optAssertionTabPrivate[i] is to be removed
                            CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef DEBUG
                            if (verbose)
                            {
                                printf("The QMARK-COLON ");
                                printTreeID(tree);
                                printf(" removes assertion candidate #%d\n", index);
                            }
#endif
                            optAssertionRemove(index);
                            continue;
                        KEEP:
                            // The data at optAssertionTabPrivate[i] is to be kept
                            index++;
                        }
                    }
                }
            }
#endif    // LOCAL_ASSERTION_PROP
        } // if (op2)

    DONE_MORPHING_CHILDREN:

/*-------------------------------------------------------------------------
 * Now do POST-ORDER processing
 */

#if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
        // Variable shifts of a long end up being helper calls, so mark the tree as such. This
        // is potentially too conservative, since they'll get treated as having side effects.
        // It is important to mark them as calls so if they are part of an argument list,
        // they will get sorted and processed properly (for example, it is important to handle
        // all nested calls before putting struct arguments in the argument registers). We
        // could mark the trees just before argument processing, but it would require a full
        // tree walk of the argument tree, so we just do it here, instead, even though we'll
        // mark non-argument trees (that will still get converted to calls, anyway).
        if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
        {
            tree->gtFlags |= GTF_CALL;
        }
#endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_

        if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
            (op2 && !varTypeIsGC(op2->TypeGet())))
        {
            // The tree is really not GC but was marked as such. Now that the
            // children have been unmarked, unmark the tree too.

            // Remember that GT_COMMA inherits it's type only from op2
            if (tree->gtOper == GT_COMMA)
            {
                tree->gtType = genActualType(op2->TypeGet());
            }
            else
            {
                tree->gtType = genActualType(op1->TypeGet());
            }
        }

        GenTreePtr oldTree = tree;

        GenTreePtr qmarkOp1 = nullptr;
        GenTreePtr qmarkOp2 = nullptr;

        if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
        {
            qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
            qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
        }

        // Try to fold it, maybe we get lucky,
        tree = gtFoldExpr(tree);

        if (oldTree != tree)
        {
            /* if gtFoldExpr returned op1 or op2 then we are done */
            if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
            {
                return tree;
            }

            /* If we created a comma-throw tree then we need to morph op1 */
            if (fgIsCommaThrow(tree))
            {
                tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
                fgMorphTreeDone(tree);
                return tree;
            }

            return tree;
        }
        else if (tree->OperKind() & GTK_CONST)
        {
            return tree;
        }

        /* gtFoldExpr could have used setOper to change the oper */
        oper = tree->OperGet();
        typ  = tree->TypeGet();

        /* gtFoldExpr could have changed op1 and op2 */
        op1 = tree->gtOp.gtOp1;
        op2 = tree->gtGetOp2IfPresent();

        // Do we have an integer compare operation?
        //
        if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
        {
            // Are we comparing against zero?
            //
            if (op2->IsIntegralConst(0))
            {
                // Request that the codegen for op1 sets the condition flags
                // when it generates the code for op1.
                //
                // Codegen for op1 must set the condition flags if
                // this method returns true.
                //
                op1->gtRequestSetFlags();
            }
        }
        /*-------------------------------------------------------------------------
         * Perform the required oper-specific postorder morphing
         */

        GenTreePtr           temp;
        GenTreePtr           cns1, cns2;
        GenTreePtr           thenNode;
        GenTreePtr           elseNode;
        size_t               ival1, ival2;
        GenTreePtr           lclVarTree;
        GenTreeLclVarCommon* lclVarCmnTree;
        FieldSeqNode*        fieldSeq = nullptr;

        switch (oper)
        {
            case GT_ASG:

                lclVarTree = fgIsIndirOfAddrOfLocal(op1);
                if (lclVarTree != nullptr)
                {
                    lclVarTree->gtFlags |= GTF_VAR_DEF;
                }

                if (op1->gtEffectiveVal()->OperIsConst())
                {
                    op1              = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
                    tree->gtOp.gtOp1 = op1;
                }

                /* If we are storing a small type, we might be able to omit a cast */
                if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
                {
                    if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
                    {
                        var_types castType = op2->CastToType();

                        // If we are performing a narrowing cast and
                        // castType is larger or the same as op1's type
                        // then we can discard the cast.

                        if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
                        {
                            tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
                        }
                    }
                    else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
                    {
                        /* We don't need to zero extend the setcc instruction */
                        op2->gtType = TYP_BYTE;
                    }
                }
                // If we introduced a CSE we may need to undo the optimization above
                // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
                // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
                else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
                {
                    unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
                    LclVarDsc* varDsc = &lvaTable[varNum];

                    /* We again need to zero extend the setcc instruction */
                    op2->gtType = varDsc->TypeGet();
                }
                fgAssignSetVarDef(tree);

                __fallthrough;

            case GT_ASG_ADD:
            case GT_ASG_SUB:
            case GT_ASG_MUL:
            case GT_ASG_DIV:
            case GT_ASG_MOD:
            case GT_ASG_UDIV:
            case GT_ASG_UMOD:
            case GT_ASG_OR:
            case GT_ASG_XOR:
            case GT_ASG_AND:
            case GT_ASG_LSH:
            case GT_ASG_RSH:
            case GT_ASG_RSZ:

                /* We can't CSE the LHS of an assignment */
                /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
                if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
                {
                    op1->gtFlags |= GTF_DONT_CSE;
                }
                break;

            case GT_EQ:
            case GT_NE:

                /* Make sure we're allowed to do this */

                if (optValnumCSE_phase)
                {
                    // It is not safe to reorder/delete CSE's
                    break;
                }

                cns2 = op2;

                /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */

                if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
                {
                    op1 = tree->gtOp.gtOp1;

                    /* Since this can occur repeatedly we use a while loop */

                    while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
                           (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
                           (op1->gtOverflow() == false))
                    {
                        /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */

                        ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
                        ival2 = cns2->gtIntCon.gtIconVal;

                        if (op1->gtOper == GT_ADD)
                        {
                            ival2 -= ival1;
                        }
                        else
                        {
                            ival2 += ival1;
                        }
                        cns2->gtIntCon.gtIconVal = ival2;

#ifdef _TARGET_64BIT_
                        // we need to properly re-sign-extend or truncate as needed.
                        cns2->AsIntCon()->TruncateOrSignExtend32();
#endif // _TARGET_64BIT_

                        op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
                    }
                }

                //
                // Here we look for the following tree
                //
                //                        EQ/NE
                //                        /  \
                //                      op1   CNS 0/1
                //
                ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1

                // cast to unsigned allows test for both 0 and 1
                if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
                {
                    ival2 = (size_t)cns2->gtIntConCommon.IconValue();
                }
                else // cast to UINT64 allows test for both 0 and 1
                    if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
                {
                    ival2 = (size_t)cns2->gtIntConCommon.LngValue();
                }

                if (ival2 != INT_MAX)
                {
                    // If we don't have a comma and relop, we can't do this optimization
                    //
                    if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
                    {
                        // Here we look for the following transformation
                        //
                        //                  EQ/NE                    Possible REVERSE(RELOP)
                        //                  /  \                           /      \
                        //               COMMA CNS 0/1             ->   COMMA   relop_op2
                        //              /   \                          /    \
                        //             x  RELOP                       x     relop_op1
                        //               /    \
                        //         relop_op1  relop_op2
                        //
                        //
                        //
                        GenTreePtr comma = op1;
                        GenTreePtr relop = comma->gtOp.gtOp2;

                        GenTreePtr relop_op1 = relop->gtOp.gtOp1;

                        bool reverse = ((ival2 == 0) == (oper == GT_EQ));

                        if (reverse)
                        {
                            gtReverseCond(relop);
                        }

                        relop->gtOp.gtOp1 = comma;
                        comma->gtOp.gtOp2 = relop_op1;

                        // Comma now has fewer nodes underneath it, so we need to regenerate its flags
                        comma->gtFlags &= ~GTF_ALL_EFFECT;
                        comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
                        comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;

                        noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
                        noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
                        relop->gtFlags |=
                            tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);

                        return relop;
                    }

                    if (op1->gtOper == GT_COMMA)
                    {
                        // Here we look for the following tree
                        // and when the LCL_VAR is a temp we can fold the tree:
                        //
                        //                        EQ/NE                  EQ/NE
                        //                        /  \                   /  \
                        //                     COMMA  CNS 0/1  ->     RELOP CNS 0/1
                        //                     /   \                   / \
                        //                   ASG  LCL_VAR
                        //                  /  \
                        //           LCL_VAR   RELOP
                        //                      / \
                        //

                        GenTreePtr asg = op1->gtOp.gtOp1;
                        GenTreePtr lcl = op1->gtOp.gtOp2;

                        /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
                        if (asg->gtOper != GT_ASG)
                        {
                            goto SKIP;
                        }

                        /* The right side of the comma must be a LCL_VAR temp */
                        if (lcl->gtOper != GT_LCL_VAR)
                        {
                            goto SKIP;
                        }

                        unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
                        noway_assert(lclNum < lvaCount);

                        /* If the LCL_VAR is not a temp then bail, a temp has a single def */
                        if (!lvaTable[lclNum].lvIsTemp)
                        {
                            goto SKIP;
                        }

#if FEATURE_ANYCSE
                        /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
                        // Fix 383856 X86/ARM ILGEN
                        if (lclNumIsCSE(lclNum))
                        {
                            goto SKIP;
                        }
#endif

                        /* We also must be assigning the result of a RELOP */
                        if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
                        {
                            goto SKIP;
                        }

                        /* Both of the LCL_VAR must match */
                        if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
                        {
                            goto SKIP;
                        }

                        /* If right side of asg is not a RELOP then skip */
                        if (!asg->gtOp.gtOp2->OperIsCompare())
                        {
                            goto SKIP;
                        }

                        LclVarDsc* varDsc = lvaTable + lclNum;

                        /* Set op1 to the right side of asg, (i.e. the RELOP) */
                        op1 = asg->gtOp.gtOp2;

                        DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
                        DEBUG_DESTROY_NODE(lcl);

                        /* This local variable should never be used again */
                        // <BUGNUM>
                        // VSW 184221: Make RefCnt to zero to indicate that this local var
                        // is not used any more. (Keey the lvType as is.)
                        // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
                        // And then emitter::emitEndCodeGen will assert in the following line:
                        //        noway_assert( dsc->lvTracked);
                        // </BUGNUM>
                        noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
                                     varDsc->lvRefCnt == 2    // Or, we assume this tmp should only be used here,
                                                              // and it only shows up twice.
                                     );
                        lvaTable[lclNum].lvRefCnt = 0;
                        lvaTable[lclNum].lvaResetSortAgainFlag(this);
                    }

                    if (op1->OperIsCompare())
                    {
                        // Here we look for the following tree
                        //
                        //                        EQ/NE           ->      RELOP/!RELOP
                        //                        /  \                       /    \
                        //                     RELOP  CNS 0/1
                        //                     /   \
                        //
                        // Note that we will remove/destroy the EQ/NE node and move
                        // the RELOP up into it's location.

                        /* Here we reverse the RELOP if necessary */

                        bool reverse = ((ival2 == 0) == (oper == GT_EQ));

                        if (reverse)
                        {
                            gtReverseCond(op1);
                        }

                        /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
                        op1->gtType = tree->gtType;

                        noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
                        op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);

                        DEBUG_DESTROY_NODE(tree);
                        return op1;
                    }

                    //
                    // Now we check for a compare with the result of an '&' operator
                    //
                    // Here we look for the following transformation:
                    //
                    //                        EQ/NE                  EQ/NE
                    //                        /  \                   /  \
                    //                      AND   CNS 0/1  ->      AND   CNS 0
                    //                     /   \                  /   \
                    //                RSZ/RSH   CNS 1            x     CNS (1 << y)
                    //                  /  \
                    //                 x   CNS_INT +y

                    if (op1->gtOper == GT_AND)
                    {
                        GenTreePtr andOp    = op1;
                        GenTreePtr rshiftOp = andOp->gtOp.gtOp1;

                        if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
                        {
                            goto SKIP;
                        }

                        if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
                        {
                            goto SKIP;
                        }

                        ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;

                        if (shiftAmount < 0)
                        {
                            goto SKIP;
                        }

                        if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
                        {
                            goto SKIP;
                        }

                        if (andOp->gtType == TYP_INT)
                        {
                            if (shiftAmount > 31)
                            {
                                goto SKIP;
                            }

                            UINT32 newAndOperand = ((UINT32)1) << shiftAmount;

                            andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;

                            // Reverse the cond if necessary
                            if (ival2 == 1)
                            {
                                gtReverseCond(tree);
                                cns2->gtIntCon.gtIconVal = 0;
                                oper                     = tree->gtOper;
                            }
                        }
                        else if (andOp->gtType == TYP_LONG)
                        {
                            if (shiftAmount > 63)
                            {
                                goto SKIP;
                            }

                            UINT64 newAndOperand = ((UINT64)1) << shiftAmount;

                            andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);

                            // Reverse the cond if necessary
                            if (ival2 == 1)
                            {
                                gtReverseCond(tree);
                                cns2->gtIntConCommon.SetLngValue(0);
                                oper = tree->gtOper;
                            }
                        }

                        andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;

                        DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
                        DEBUG_DESTROY_NODE(rshiftOp);
                    }
                } // END if (ival2 != INT_MAX)

            SKIP:
                /* Now check for compares with small constant longs that can be cast to int */

                if (!cns2->OperIsConst())
                {
                    goto COMPARE;
                }

                if (cns2->TypeGet() != TYP_LONG)
                {
                    goto COMPARE;
                }

                /* Is the constant 31 bits or smaller? */

                if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
                {
                    goto COMPARE;
                }

                /* Is the first comparand mask operation of type long ? */

                if (op1->gtOper != GT_AND)
                {
                    /* Another interesting case: cast from int */

                    if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
                        !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
                        !op1->gtOverflow())              // cannot be an overflow checking cast
                    {
                        /* Simply make this into an integer comparison */

                        tree->gtOp.gtOp1 = op1->gtCast.CastOp();
                        tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
                    }

                    goto COMPARE;
                }

                noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);

                /* Is the result of the mask effectively an INT ? */

                GenTreePtr andMask;
                andMask = op1->gtOp.gtOp2;
                if (andMask->gtOper != GT_CNS_NATIVELONG)
                {
                    goto COMPARE;
                }
                if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
                {
                    goto COMPARE;
                }

                /* Now we know that we can cast gtOp.gtOp1 of AND to int */

                op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);

                /* now replace the mask node (gtOp.gtOp2 of AND node) */

                noway_assert(andMask == op1->gtOp.gtOp2);

                ival1 = (int)andMask->gtIntConCommon.LngValue();
                andMask->SetOper(GT_CNS_INT);
                andMask->gtType             = TYP_INT;
                andMask->gtIntCon.gtIconVal = ival1;

                /* now change the type of the AND node */

                op1->gtType = TYP_INT;

                /* finally we replace the comparand */

                ival2 = (int)cns2->gtIntConCommon.LngValue();
                cns2->SetOper(GT_CNS_INT);
                cns2->gtType = TYP_INT;

                noway_assert(cns2 == op2);
                cns2->gtIntCon.gtIconVal = ival2;

                goto COMPARE;

            case GT_LT:
            case GT_LE:
            case GT_GE:
            case GT_GT:

                if ((tree->gtFlags & GTF_UNSIGNED) == 0)
                {
                    if (op2->gtOper == GT_CNS_INT)
                    {
                        cns2 = op2;
                        /* Check for "expr relop 1" */
                        if (cns2->IsIntegralConst(1))
                        {
                            /* Check for "expr >= 1" */
                            if (oper == GT_GE)
                            {
                                /* Change to "expr > 0" */
                                oper = GT_GT;
                                goto SET_OPER;
                            }
                            /* Check for "expr < 1" */
                            else if (oper == GT_LT)
                            {
                                /* Change to "expr <= 0" */
                                oper = GT_LE;
                                goto SET_OPER;
                            }
                        }
                        /* Check for "expr relop -1" */
                        else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
                        {
                            /* Check for "expr <= -1" */
                            if (oper == GT_LE)
                            {
                                /* Change to "expr < 0" */
                                oper = GT_LT;
                                goto SET_OPER;
                            }
                            /* Check for "expr > -1" */
                            else if (oper == GT_GT)
                            {
                                /* Change to "expr >= 0" */
                                oper = GT_GE;

                            SET_OPER:
                                // IF we get here we should be changing 'oper'
                                assert(tree->OperGet() != oper);

                                // Keep the old ValueNumber for 'tree' as the new expr
                                // will still compute the same value as before
                                tree->SetOper(oper, GenTree::PRESERVE_VN);
                                cns2->gtIntCon.gtIconVal = 0;

                                // vnStore is null before the ValueNumber phase has run
                                if (vnStore != nullptr)
                                {
                                    // Update the ValueNumber for 'cns2', as we just changed it to 0
                                    fgValueNumberTreeConst(cns2);
                                }

                                op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
                            }
                        }
                    }
                }
                else // we have an unsigned comparison
                {
                    if (op2->IsIntegralConst(0))
                    {
                        if ((oper == GT_GT) || (oper == GT_LE))
                        {
                            // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
                            // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
                            // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
                            // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
                            // occurs as a result of branch inversion.
                            oper = (oper == GT_LE) ? GT_EQ : GT_NE;
                            tree->SetOper(oper, GenTree::PRESERVE_VN);
                            tree->gtFlags &= ~GTF_UNSIGNED;
                        }
                    }
                }

            COMPARE:

                noway_assert(tree->OperKind() & GTK_RELOP);

                /* Check if the result of the comparison is used for a jump.
                 * If not then only the int (i.e. 32 bit) case is handled in
                 * the code generator through the (x86) "set" instructions.
                 * For the rest of the cases, the simplest way is to
                 * "simulate" the comparison with ?:
                 *
                 * On ARM, we previously used the IT instruction, but the IT instructions
                 * have mostly been declared obsolete and off-limits, so all cases on ARM
                 * get converted to ?: */

                if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
                {
                    /* We convert it to "(CMP_TRUE) ? (1):(0)" */

                    op1 = tree;
                    op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
                    op1->gtRequestSetFlags();

                    op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
                    op2 = fgMorphTree(op2);

                    tree = gtNewQmarkNode(TYP_INT, op1, op2);

                    fgMorphTreeDone(tree);

                    return tree;
                }
                break;

            case GT_QMARK:

                /* If op1 is a comma throw node then we won't be keeping op2 */
                if (fgIsCommaThrow(op1))
                {
                    break;
                }

                /* Get hold of the two branches */

                noway_assert(op2->OperGet() == GT_COLON);
                elseNode = op2->AsColon()->ElseNode();
                thenNode = op2->AsColon()->ThenNode();

                /* Try to hoist assignments out of qmark colon constructs.
                   ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */

                if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
                    thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
                    thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
                {
                    noway_assert(thenNode->TypeGet() == elseNode->TypeGet());

                    GenTreePtr asg    = thenNode;
                    GenTreePtr colon  = op2;
                    colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
                    colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
                    tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
                    asg->gtOp.gtOp2              = tree;

                    // Asg will have all the flags that the QMARK had
                    asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);

                    // Colon flag won't have the flags that x had.
                    colon->gtFlags &= ~GTF_ALL_EFFECT;
                    colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;

                    DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
                    DEBUG_DESTROY_NODE(elseNode);

                    return asg;
                }

                /* If the 'else' branch is empty swap the two branches and reverse the condition */

                if (elseNode->IsNothingNode())
                {
                    /* This can only happen for VOID ?: */
                    noway_assert(op2->gtType == TYP_VOID);

                    /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
                    if (thenNode->IsNothingNode())
                    {
                        // We may be able to throw away op1 (unless it has side-effects)

                        if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
                        {
                            /* Just return a a Nop Node */
                            return thenNode;
                        }
                        else
                        {
                            /* Just return the relop, but clear the special flags.  Note
                               that we can't do that for longs and floats (see code under
                               COMPARE label above) */

                            if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
                            {
                                op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
                                return op1;
                            }
                        }
                    }
                    else
                    {
                        GenTreePtr tmp = elseNode;

                        op2->AsColon()->ElseNode() = elseNode = thenNode;
                        op2->AsColon()->ThenNode() = thenNode = tmp;
                        gtReverseCond(op1);
                    }
                }

#if !defined(_TARGET_ARM_)
                // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
                //
                // Don't do this optimization for ARM: we always require assignment
                // to boolean to remain ?:, since we don't have any way to generate
                // this with straight-line code, like x86 does using setcc (at least
                // after the IT instruction is deprecated).

                if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
                    thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
                {
                    ival1 = thenNode->gtIntCon.gtIconVal;
                    ival2 = elseNode->gtIntCon.gtIconVal;

                    // Is one constant 0 and the other 1?
                    if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
                    {
                        // If the constants are {1, 0}, reverse the condition
                        if (ival1 == 1)
                        {
                            gtReverseCond(op1);
                        }

                        // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
                        // needs to materialize the result as a 0 or 1.
                        noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
                        op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);

                        DEBUG_DESTROY_NODE(tree);
                        DEBUG_DESTROY_NODE(op2);

                        return op1;
                    }
                }
#endif // !_TARGET_ARM_

                break; // end case GT_QMARK

            case GT_MUL:

#ifndef _TARGET_64BIT_
                if (typ == TYP_LONG)
                {
                    // This must be GTF_MUL_64RSLT
                    assert(tree->gtIsValid64RsltMul());
                    return tree;
                }
#endif // _TARGET_64BIT_
                goto CM_OVF_OP;

            case GT_SUB:

                if (tree->gtOverflow())
                {
                    goto CM_OVF_OP;
                }

                // TODO #4104: there are a lot of other places where
                // this condition is not checked before transformations.
                if (fgGlobalMorph)
                {
                    /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */

                    noway_assert(op2);
                    if (op2->IsCnsIntOrI())
                    {
                        /* Negate the constant and change the node to be "+" */

                        op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
                        oper = GT_ADD;
                        tree->ChangeOper(oper);
                        goto CM_ADD_OP;
                    }

                    /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */

                    noway_assert(op1);
                    if (op1->IsCnsIntOrI())
                    {
                        noway_assert(varTypeIsIntOrI(tree));

                        tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
                                                                                           // node should be the same
                        // as the type of the tree, i.e. tree->gtType.
                        fgMorphTreeDone(op2);

                        oper = GT_ADD;
                        tree->ChangeOper(oper);
                        goto CM_ADD_OP;
                    }

                    /* No match - exit */
                }
                break;

#ifdef _TARGET_ARM64_
            case GT_DIV:
                if (!varTypeIsFloating(tree->gtType))
                {
                    // Codegen for this instruction needs to be able to throw two exceptions:
                    fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
                    fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
                }
                break;
            case GT_UDIV:
                // Codegen for this instruction needs to be able to throw one exception:
                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
                break;
#endif

            case GT_ADD:

            CM_OVF_OP:
                if (tree->gtOverflow())
                {
                    tree->gtRequestSetFlags();

                    // Add the excptn-throwing basic block to jump to on overflow

                    fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);

                    // We can't do any commutative morphing for overflow instructions

                    break;
                }

            CM_ADD_OP:

            case GT_OR:
            case GT_XOR:
            case GT_AND:

                /* Commute any non-REF constants to the right */

                noway_assert(op1);
                if (op1->OperIsConst() && (op1->gtType != TYP_REF))
                {
                    // TODO-Review: We used to assert here that
                    // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
                    // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
                    // and would sometimes hit this assertion.  This may indicate a missed "remorph".
                    // Task is to re-enable this assertion and investigate.

                    /* Swap the operands */
                    tree->gtOp.gtOp1 = op2;
                    tree->gtOp.gtOp2 = op1;

                    op1 = op2;
                    op2 = tree->gtOp.gtOp2;
                }

                /* See if we can fold GT_ADD nodes. */

                if (oper == GT_ADD)
                {
                    /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */

                    if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
                        op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
                        !op1->gtOverflow() && !op2->gtOverflow())
                    {
                        cns1 = op1->gtOp.gtOp2;
                        cns2 = op2->gtOp.gtOp2;
                        cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
#ifdef _TARGET_64BIT_
                        if (cns1->TypeGet() == TYP_INT)
                        {
                            // we need to properly re-sign-extend or truncate after adding two int constants above
                            cns1->AsIntCon()->TruncateOrSignExtend32();
                        }
#endif //_TARGET_64BIT_

                        tree->gtOp.gtOp2 = cns1;
                        DEBUG_DESTROY_NODE(cns2);

                        op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
                        op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
                        DEBUG_DESTROY_NODE(op2);
                        op2 = tree->gtOp.gtOp2;
                    }

                    if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
                    {
                        /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */

                        if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
                            !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
                        {
                            cns1 = op1->gtOp.gtOp2;
                            op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
                                                             op2->gtIntConCommon.IconValue());
#ifdef _TARGET_64BIT_
                            if (op2->TypeGet() == TYP_INT)
                            {
                                // we need to properly re-sign-extend or truncate after adding two int constants above
                                op2->AsIntCon()->TruncateOrSignExtend32();
                            }
#endif //_TARGET_64BIT_

                            if (cns1->OperGet() == GT_CNS_INT)
                            {
                                op2->gtIntCon.gtFieldSeq =
                                    GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
                            }
                            DEBUG_DESTROY_NODE(cns1);

                            tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
                            DEBUG_DESTROY_NODE(op1);
                            op1 = tree->gtOp.gtOp1;
                        }

                        // Fold (x + 0).

                        if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
                        {

                            // If this addition is adding an offset to a null pointer,
                            // avoid the work and yield the null pointer immediately.
                            // Dereferencing the pointer in either case will have the
                            // same effect.

                            if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
                                ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
                            {
                                op2->gtType = tree->gtType;
                                DEBUG_DESTROY_NODE(op1);
                                DEBUG_DESTROY_NODE(tree);
                                return op2;
                            }

                            // Remove the addition iff it won't change the tree type
                            // to TYP_REF.

                            if (!gtIsActiveCSE_Candidate(op2) &&
                                ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
                            {
                                if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
                                    (op2->gtIntCon.gtFieldSeq != nullptr) &&
                                    (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
                                {
                                    fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
                                }

                                DEBUG_DESTROY_NODE(op2);
                                DEBUG_DESTROY_NODE(tree);

                                return op1;
                            }
                        }
                    }
                }
                /* See if we can fold GT_MUL by const nodes */
                else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
                {
#ifndef _TARGET_64BIT_
                    noway_assert(typ <= TYP_UINT);
#endif // _TARGET_64BIT_
                    noway_assert(!tree->gtOverflow());

                    ssize_t mult            = op2->gtIntConCommon.IconValue();
                    bool    op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
                                           op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();

                    assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);

                    if (mult == 0)
                    {
                        // We may be able to throw away op1 (unless it has side-effects)

                        if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
                        {
                            DEBUG_DESTROY_NODE(op1);
                            DEBUG_DESTROY_NODE(tree);
                            return op2; // Just return the "0" node
                        }

                        // We need to keep op1 for the side-effects. Hang it off
                        // a GT_COMMA node

                        tree->ChangeOper(GT_COMMA);
                        return tree;
                    }

                    size_t abs_mult      = (mult >= 0) ? mult : -mult;
                    size_t lowestBit     = genFindLowestBit(abs_mult);
                    bool   changeToShift = false;

                    // is it a power of two? (positive or negative)
                    if (abs_mult == lowestBit)
                    {
                        // if negative negate (min-int does not need negation)
                        if (mult < 0 && mult != SSIZE_T_MIN)
                        {
                            tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
                            fgMorphTreeDone(op1);
                        }

                        // If "op2" is a constant array index, the other multiplicand must be a constant.
                        // Transfer the annotation to the other one.
                        if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
                            op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
                        {
                            assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
                            GenTreePtr otherOp = op1;
                            if (otherOp->OperGet() == GT_NEG)
                            {
                                otherOp = otherOp->gtOp.gtOp1;
                            }
                            assert(otherOp->OperGet() == GT_CNS_INT);
                            assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
                            otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
                        }

                        if (abs_mult == 1)
                        {
                            DEBUG_DESTROY_NODE(op2);
                            DEBUG_DESTROY_NODE(tree);
                            return op1;
                        }

                        /* Change the multiplication into a shift by log2(val) bits */
                        op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
                        changeToShift = true;
                    }
#if LEA_AVAILABLE
                    else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
                    {
                        int     shift  = genLog2(lowestBit);
                        ssize_t factor = abs_mult >> shift;

                        if (factor == 3 || factor == 5 || factor == 9)
                        {
                            // if negative negate (min-int does not need negation)
                            if (mult < 0 && mult != SSIZE_T_MIN)
                            {
                                tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
                                fgMorphTreeDone(op1);
                            }

                            GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
                            if (op2IsConstIndex)
                            {
                                factorIcon->AsIntCon()->gtFieldSeq =
                                    GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
                            }

                            // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
                            tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
                            fgMorphTreeDone(op1);

                            op2->gtIntConCommon.SetIconValue(shift);
                            changeToShift = true;
                        }
                    }
#endif // LEA_AVAILABLE
                    if (changeToShift)
                    {
                        // vnStore is null before the ValueNumber phase has run
                        if (vnStore != nullptr)
                        {
                            // Update the ValueNumber for 'op2', as we just changed the constant
                            fgValueNumberTreeConst(op2);
                        }
                        oper = GT_LSH;
                        // Keep the old ValueNumber for 'tree' as the new expr
                        // will still compute the same value as before
                        tree->ChangeOper(oper, GenTree::PRESERVE_VN);

                        goto DONE_MORPHING_CHILDREN;
                    }
                }
                else if (fgOperIsBitwiseRotationRoot(oper))
                {
                    tree = fgRecognizeAndMorphBitwiseRotation(tree);

                    // fgRecognizeAndMorphBitwiseRotation may return a new tree
                    oper = tree->OperGet();
                    typ  = tree->TypeGet();
                    op1  = tree->gtOp.gtOp1;
                    op2  = tree->gtOp.gtOp2;
                }

                break;

            case GT_CHS:
            case GT_NOT:
            case GT_NEG:

                /* Any constant cases should have been folded earlier */
                noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
                break;

            case GT_CKFINITE:

                noway_assert(varTypeIsFloating(op1->TypeGet()));

                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
                break;

            case GT_OBJ:
                // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
                // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
                // is a local or clsVar, even if it has been address-exposed.
                if (op1->OperGet() == GT_ADDR)
                {
                    tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
                }
                break;

            case GT_IND:

                // Can not remove a GT_IND if it is currently a CSE candidate.
                if (gtIsActiveCSE_Candidate(tree))
                {
                    break;
                }

                bool foldAndReturnTemp;
                foldAndReturnTemp = false;
                temp              = nullptr;
                ival1             = 0;

                /* Try to Fold *(&X) into X */
                if (op1->gtOper == GT_ADDR)
                {
                    // Can not remove a GT_ADDR if it is currently a CSE candidate.
                    if (gtIsActiveCSE_Candidate(op1))
                    {
                        break;
                    }

                    temp = op1->gtOp.gtOp1; // X

                    // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
                    // they are the *same* struct type.  In fact, they almost certainly aren't.  If the
                    // address has an associated field sequence, that identifies this case; go through
                    // the "lcl_fld" path rather than this one.
                    FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
                    if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
                    {
                        foldAndReturnTemp = true;
                    }
                    else if (temp->OperIsLocal())
                    {
                        unsigned   lclNum = temp->gtLclVarCommon.gtLclNum;
                        LclVarDsc* varDsc = &lvaTable[lclNum];

                        // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
                        if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
                        {
                            noway_assert(varTypeIsStruct(varDsc));

                            // We will try to optimize when we have a single field struct that is being struct promoted
                            if (varDsc->lvFieldCnt == 1)
                            {
                                unsigned lclNumFld = varDsc->lvFieldLclStart;
                                // just grab the promoted field
                                LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];

                                // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
                                // is zero
                                if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
                                {
                                    // We can just use the existing promoted field LclNum
                                    temp->gtLclVarCommon.SetLclNum(lclNumFld);
                                    temp->gtType = fieldVarDsc->TypeGet();

                                    foldAndReturnTemp = true;
                                }
                            }
                        }
                        // If the type of the IND (typ) is a "small int", and the type of the local has the
                        // same width, then we can reduce to just the local variable -- it will be
                        // correctly normalized, and signed/unsigned differences won't matter.
                        //
                        // The below transformation cannot be applied if the local var needs to be normalized on load.
                        else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
                                 !lvaTable[lclNum].lvNormalizeOnLoad())
                        {
                            tree->gtType = typ = temp->TypeGet();
                            foldAndReturnTemp  = true;
                        }
                        else
                        {
                            // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
                            // nullptr)
                            assert(fieldSeq == nullptr);
                            bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
                            assert(b || fieldSeq == nullptr);

                            if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
                            {
                                // Append the field sequence, change the type.
                                temp->AsLclFld()->gtFieldSeq =
                                    GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
                                temp->gtType = typ;

                                foldAndReturnTemp = true;
                            }
                        }
                        // Otherwise will will fold this into a GT_LCL_FLD below
                        //   where we check (temp != nullptr)
                    }
                    else // !temp->OperIsLocal()
                    {
                        // We don't try to fold away the GT_IND/GT_ADDR for this case
                        temp = nullptr;
                    }
                }
                else if (op1->OperGet() == GT_ADD)
                {
                    /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */

                    if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
                        (!(opts.MinOpts() || opts.compDbgCode)))
                    {
                        // No overflow arithmetic with pointers
                        noway_assert(!op1->gtOverflow());

                        temp = op1->gtOp.gtOp1->gtOp.gtOp1;
                        if (!temp->OperIsLocal())
                        {
                            temp = nullptr;
                            break;
                        }

                        // Can not remove the GT_ADDR if it is currently a CSE candidate.
                        if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
                        {
                            break;
                        }

                        ival1    = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
                        fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;

                        // Does the address have an associated zero-offset field sequence?
                        FieldSeqNode* addrFieldSeq = nullptr;
                        if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
                        {
                            fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
                        }

                        if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
                        {
                            noway_assert(!varTypeIsGC(temp->TypeGet()));
                            foldAndReturnTemp = true;
                        }
                        else
                        {
                            // The emitter can't handle large offsets
                            if (ival1 != (unsigned short)ival1)
                            {
                                break;
                            }

                            // The emitter can get confused by invalid offsets
                            if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
                            {
                                break;
                            }

#ifdef _TARGET_ARM_
                            // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
                            //
                            if (varTypeIsFloating(typ))
                            {
                                if ((ival1 % emitTypeSize(typ)) != 0)
                                {
                                    tree->gtFlags |= GTF_IND_UNALIGNED;
                                    break;
                                }
                            }
#endif
                        }
                        // Now we can fold this into a GT_LCL_FLD below
                        //   where we check (temp != nullptr)
                    }
                }

                // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
                // - We may have a load of a local where the load has a different type than the local
                // - We may have a load of a local plus an offset
                //
                // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
                // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
                // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
                // out-of-bounds w.r.t. the local).
                if ((temp != nullptr) && !foldAndReturnTemp)
                {
                    assert(temp->OperIsLocal());

                    const unsigned   lclNum = temp->AsLclVarCommon()->gtLclNum;
                    LclVarDsc* const varDsc = &lvaTable[lclNum];

                    const var_types tempTyp = temp->TypeGet();
                    const bool      useExactSize =
                        varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
                    const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);

                    // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
                    // a lclFld: the access represented by an lclFld node must begin at or after the start of the
                    // lclVar and must not extend beyond the end of the lclVar.
                    if ((ival1 < 0) || ((ival1 + genTypeSize(typ)) > varSize))
                    {
                        lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
                    }
                    else
                    {
                        // Make sure we don't separately promote the fields of this struct.
                        if (varDsc->lvRegStruct)
                        {
                            // We can enregister, but can't promote.
                            varDsc->lvPromoted = false;
                        }
                        else
                        {
                            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
                        }

                        // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
                        // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
                        // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
                        //
                        if (temp->OperGet() == GT_LCL_FLD)
                        {
                            temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
                            temp->AsLclFld()->gtFieldSeq =
                                GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
                        }
                        else
                        {
                            temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
                            temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
                            if (fieldSeq != nullptr)
                            { // If it does represent a field, note that.
                                temp->AsLclFld()->gtFieldSeq = fieldSeq;
                            }
                        }
                        temp->gtType      = tree->gtType;
                        foldAndReturnTemp = true;
                    }
                }

                if (foldAndReturnTemp)
                {
                    assert(temp != nullptr);
                    assert(temp->TypeGet() == typ);
                    assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));

                    // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
                    // 'temp' because a GT_ADDR always marks it for its operand.
                    temp->gtFlags &= ~GTF_DONT_CSE;
                    temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);

                    if (op1->OperGet() == GT_ADD)
                    {
                        DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
                        DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
                    }
                    DEBUG_DESTROY_NODE(op1);  // GT_ADD or GT_ADDR
                    DEBUG_DESTROY_NODE(tree); // GT_IND

                    // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
                    // normalization.
                    if (temp->OperIs(GT_LCL_VAR))
                    {
#ifdef DEBUG
                        // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
                        // and the node in question must have this bit set (as it has already been morphed).
                        temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
                        const bool forceRemorph = true;
                        temp                    = fgMorphLocalVar(temp, forceRemorph);
#ifdef DEBUG
                        // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
                        // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
                        // returns.
                        temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
                    }

                    return temp;
                }

                // Only do this optimization when we are in the global optimizer. Doing this after value numbering
                // could result in an invalid value number for the newly generated GT_IND node.
                if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
                {
                    // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
                    // TBD: this transformation is currently necessary for correctness -- it might
                    // be good to analyze the failures that result if we don't do this, and fix them
                    // in other ways.  Ideally, this should be optional.
                    GenTreePtr commaNode = op1;
                    unsigned   treeFlags = tree->gtFlags;
                    commaNode->gtType    = typ;
                    commaNode->gtFlags   = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
                                                                           // dangerous, clear the GTF_REVERSE_OPS at
                                                                           // least.
#ifdef DEBUG
                    commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
                    while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
                    {
                        commaNode          = commaNode->gtOp.gtOp2;
                        commaNode->gtType  = typ;
                        commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
                                                                             // dangerous, clear the GTF_REVERSE_OPS at
                                                                             // least.
#ifdef DEBUG
                        commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
                    }
                    bool      wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
                    ArrayInfo arrInfo;
                    if (wasArrIndex)
                    {
                        bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
                        assert(b);
                        GetArrayInfoMap()->Remove(tree);
                    }
                    tree         = op1;
                    op1          = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
                    op1->gtFlags = treeFlags;
                    if (wasArrIndex)
                    {
                        GetArrayInfoMap()->Set(op1, arrInfo);
                    }
#ifdef DEBUG
                    op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
                    commaNode->gtOp.gtOp2 = op1;
                    return tree;
                }

                break;

            case GT_ADDR:

                // Can not remove op1 if it is currently a CSE candidate.
                if (gtIsActiveCSE_Candidate(op1))
                {
                    break;
                }

                if (op1->OperGet() == GT_IND)
                {
                    if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
                    {
                        // Can not remove a GT_ADDR if it is currently a CSE candidate.
                        if (gtIsActiveCSE_Candidate(tree))
                        {
                            break;
                        }

                        // Perform the transform ADDR(IND(...)) == (...).
                        GenTreePtr addr = op1->gtOp.gtOp1;

                        noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);

                        DEBUG_DESTROY_NODE(op1);
                        DEBUG_DESTROY_NODE(tree);

                        return addr;
                    }
                }
                else if (op1->OperGet() == GT_OBJ)
                {
                    // Can not remove a GT_ADDR if it is currently a CSE candidate.
                    if (gtIsActiveCSE_Candidate(tree))
                    {
                        break;
                    }

                    // Perform the transform ADDR(OBJ(...)) == (...).
                    GenTreePtr addr = op1->AsObj()->Addr();

                    noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);

                    DEBUG_DESTROY_NODE(op1);
                    DEBUG_DESTROY_NODE(tree);

                    return addr;
                }
                else if (op1->gtOper == GT_CAST)
                {
                    GenTreePtr casting = op1->gtCast.CastOp();
                    if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
                    {
                        DEBUG_DESTROY_NODE(op1);
                        tree->gtOp.gtOp1 = op1 = casting;
                    }
                }
                else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
                {
                    // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
                    // (Be sure to mark "z" as an l-value...)
                    GenTreePtr commaNode = op1;
                    while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
                    {
                        commaNode = commaNode->gtOp.gtOp2;
                    }
                    // The top-level addr might be annotated with a zeroOffset field.
                    FieldSeqNode* zeroFieldSeq = nullptr;
                    bool          isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
                    tree                       = op1;
                    commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;

                    // If the node we're about to put under a GT_ADDR is an indirection, it
                    // doesn't need to be materialized, since we only want the addressing mode. Because
                    // of this, this GT_IND is not a faulting indirection and we don't have to extract it
                    // as a side effect.
                    GenTree* commaOp2 = commaNode->gtOp.gtOp2;
                    if (commaOp2->OperIsBlk())
                    {
                        commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
                    }
                    if (commaOp2->gtOper == GT_IND)
                    {
                        commaOp2->gtFlags |= GTF_IND_NONFAULTING;
                    }

                    op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);

                    if (isZeroOffset)
                    {
                        // Transfer the annotation to the new GT_ADDR node.
                        GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
                    }
                    commaNode->gtOp.gtOp2 = op1;
                    // Originally, I gave all the comma nodes type "byref".  But the ADDR(IND(x)) == x transform
                    // might give op1 a type different from byref (like, say, native int).  So now go back and give
                    // all the comma nodes the type of op1.
                    // TODO: the comma flag update below is conservative and can be improved.
                    // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
                    // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
                    commaNode = tree;
                    while (commaNode->gtOper == GT_COMMA)
                    {
                        commaNode->gtType = op1->gtType;
                        commaNode->gtFlags |= op1->gtFlags;
#ifdef DEBUG
                        commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
                        commaNode = commaNode->gtOp.gtOp2;
                    }

                    return tree;
                }

                /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
                op1->gtFlags |= GTF_DONT_CSE;
                break;

            case GT_COLON:
                if (fgGlobalMorph)
                {
                    /* Mark the nodes that are conditionally executed */
                    fgWalkTreePre(&tree, gtMarkColonCond);
                }
                /* Since we're doing this postorder we clear this if it got set by a child */
                fgRemoveRestOfBlock = false;
                break;

            case GT_COMMA:

                /* Special case: trees that don't produce a value */
                if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
                    fgIsThrow(op2))
                {
                    typ = tree->gtType = TYP_VOID;
                }

                // If we are in the Valuenum CSE phase then don't morph away anything as these
                // nodes may have CSE defs/uses in them.
                //
                if (!optValnumCSE_phase)
                {
                    // Extract the side effects from the left side of the comma.  Since they don't "go" anywhere, this
                    // is all we need.

                    GenTreePtr op1SideEffects = nullptr;
                    // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
                    // hoisted expressions in loops.
                    gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
                    if (op1SideEffects)
                    {
                        // Replace the left hand side with the side effect list.
                        tree->gtOp.gtOp1 = op1SideEffects;
                        tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
                    }
                    else
                    {
                        /* The left operand is worthless, throw it away */
                        if (lvaLocalVarRefCounted)
                        {
                            lvaRecursiveDecRefCounts(op1);
                        }
                        op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
                        DEBUG_DESTROY_NODE(tree);
                        DEBUG_DESTROY_NODE(op1);
                        return op2;
                    }

                    /* If the right operand is just a void nop node, throw it away */
                    if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
                    {
                        op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
                        DEBUG_DESTROY_NODE(tree);
                        DEBUG_DESTROY_NODE(op2);
                        return op1;
                    }
                }

                break;

            case GT_JTRUE:

                /* Special case if fgRemoveRestOfBlock is set to true */
                if (fgRemoveRestOfBlock)
                {
                    if (fgIsCommaThrow(op1, true))
                    {
                        GenTreePtr throwNode = op1->gtOp.gtOp1;
                        noway_assert(throwNode->gtType == TYP_VOID);

                        return throwNode;
                    }

                    noway_assert(op1->OperKind() & GTK_RELOP);
                    noway_assert(op1->gtFlags & GTF_EXCEPT);

                    // We need to keep op1 for the side-effects. Hang it off
                    // a GT_COMMA node

                    tree->ChangeOper(GT_COMMA);
                    tree->gtOp.gtOp2 = op2 = gtNewNothingNode();

                    // Additionally since we're eliminating the JTRUE
                    // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
                    // So we change it into a GT_COMMA as well.
                    op1->ChangeOper(GT_COMMA);
                    op1->gtType = op1->gtOp.gtOp1->gtType;

                    return tree;
                }

            default:
                break;
        }

        noway_assert(oper == tree->gtOper);

        // If we are in the Valuenum CSE phase then don't morph away anything as these
        // nodes may have CSE defs/uses in them.
        //
        if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
        {
            /* Check for op1 as a GT_COMMA with a unconditional throw node */
            if (op1 && fgIsCommaThrow(op1, true))
            {
                if ((op1->gtFlags & GTF_COLON_COND) == 0)
                {
                    /* We can safely throw out the rest of the statements */
                    fgRemoveRestOfBlock = true;
                }

                GenTreePtr throwNode = op1->gtOp.gtOp1;
                noway_assert(throwNode->gtType == TYP_VOID);

                if (oper == GT_COMMA)
                {
                    /* Both tree and op1 are GT_COMMA nodes */
                    /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
                    tree->gtOp.gtOp1 = throwNode;
                    return tree;
                }
                else if (oper != GT_NOP)
                {
                    if (genActualType(typ) == genActualType(op1->gtType))
                    {
                        /* The types match so, return the comma throw node as the new tree */
                        return op1;
                    }
                    else
                    {
                        if (typ == TYP_VOID)
                        {
                            // Return the throw node
                            return throwNode;
                        }
                        else
                        {
                            GenTreePtr commaOp2 = op1->gtOp.gtOp2;

                            // need type of oper to be same as tree
                            if (typ == TYP_LONG)
                            {
                                commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
                                commaOp2->gtIntConCommon.SetLngValue(0);
                                /* Change the types of oper and commaOp2 to TYP_LONG */
                                op1->gtType = commaOp2->gtType = TYP_LONG;
                            }
                            else if (varTypeIsFloating(typ))
                            {
                                commaOp2->ChangeOperConst(GT_CNS_DBL);
                                commaOp2->gtDblCon.gtDconVal = 0.0;
                                /* Change the types of oper and commaOp2 to TYP_DOUBLE */
                                op1->gtType = commaOp2->gtType = TYP_DOUBLE;
                            }
                            else
                            {
                                commaOp2->ChangeOperConst(GT_CNS_INT);
                                commaOp2->gtIntConCommon.SetIconValue(0);
                                /* Change the types of oper and commaOp2 to TYP_INT */
                                op1->gtType = commaOp2->gtType = TYP_INT;
                            }

                            /* Return the GT_COMMA node as the new tree */
                            return op1;
                        }
                    }
                }
            }

            /* Check for op2 as a GT_COMMA with a unconditional throw */

            if (op2 && fgIsCommaThrow(op2, true))
            {
                if ((op2->gtFlags & GTF_COLON_COND) == 0)
                {
                    /* We can safely throw out the rest of the statements */
                    fgRemoveRestOfBlock = true;
                }

                // If op1 has no side-effects
                if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
                {
                    // If tree is an asg node
                    if (tree->OperIsAssignment())
                    {
                        /* Return the throw node as the new tree */
                        return op2->gtOp.gtOp1;
                    }

                    if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
                    {
                        /* Return the throw node as the new tree */
                        return op2->gtOp.gtOp1;
                    }

                    // If tree is a comma node
                    if (tree->OperGet() == GT_COMMA)
                    {
                        /* Return the throw node as the new tree */
                        return op2->gtOp.gtOp1;
                    }

                    /* for the shift nodes the type of op2 can differ from the tree type */
                    if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
                    {
                        noway_assert(GenTree::OperIsShiftOrRotate(oper));

                        GenTreePtr commaOp2 = op2->gtOp.gtOp2;

                        commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
                        commaOp2->gtIntConCommon.SetLngValue(0);

                        /* Change the types of oper and commaOp2 to TYP_LONG */
                        op2->gtType = commaOp2->gtType = TYP_LONG;
                    }

                    if ((genActualType(typ) == TYP_INT) &&
                        (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
                    {
                        // An example case is comparison (say GT_GT) of two longs or floating point values.

                        GenTreePtr commaOp2 = op2->gtOp.gtOp2;

                        commaOp2->ChangeOperConst(GT_CNS_INT);
                        commaOp2->gtIntCon.gtIconVal = 0;
                        /* Change the types of oper and commaOp2 to TYP_INT */
                        op2->gtType = commaOp2->gtType = TYP_INT;
                    }

                    if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
                    {
                        noway_assert(tree->OperGet() == GT_ADD);

                        GenTreePtr commaOp2 = op2->gtOp.gtOp2;

                        commaOp2->ChangeOperConst(GT_CNS_INT);
                        commaOp2->gtIntCon.gtIconVal = 0;
                        /* Change the types of oper and commaOp2 to TYP_BYREF */
                        op2->gtType = commaOp2->gtType = TYP_BYREF;
                    }

                    /* types should now match */
                    noway_assert((genActualType(typ) == genActualType(op2->gtType)));

                    /* Return the GT_COMMA node as the new tree */
                    return op2;
                }
            }
        }

        /*-------------------------------------------------------------------------
         * Optional morphing is done if tree transformations is permitted
         */

        if ((opts.compFlags & CLFLG_TREETRANS) == 0)
        {
            return tree;
        }

        tree = fgMorphSmpOpOptional(tree->AsOp());

    } // extra scope for gcc workaround
    return tree;
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif

GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
{
    genTreeOps oper = tree->gtOper;
    GenTree*   op1  = tree->gtOp1;
    GenTree*   op2  = tree->gtOp2;
    var_types  typ  = tree->TypeGet();

    if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
    {
        /* Swap the operands so that the more expensive one is 'op1' */

        if (tree->gtFlags & GTF_REVERSE_OPS)
        {
            tree->gtOp1 = op2;
            tree->gtOp2 = op1;

            op2 = op1;
            op1 = tree->gtOp1;

            tree->gtFlags &= ~GTF_REVERSE_OPS;
        }

        if (oper == op2->gtOper)
        {
            /*  Reorder nested operators at the same precedence level to be
                left-recursive. For example, change "(a+(b+c))" to the
                equivalent expression "((a+b)+c)".
             */

            /* Things are handled differently for floating-point operators */

            if (!varTypeIsFloating(tree->TypeGet()))
            {
                fgMoveOpsLeft(tree);
                op1 = tree->gtOp1;
                op2 = tree->gtOp2;
            }
        }
    }

#if REARRANGE_ADDS

    /* Change "((x+icon)+y)" to "((x+y)+icon)"
       Don't reorder floating-point operations */

    if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
        varTypeIsIntegralOrI(typ))
    {
        GenTreePtr ad2 = op1->gtOp.gtOp2;

        if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
        {
            // This takes
            //       + (tree)
            //      / \
            //     /   \
            //    /     \
            //   + (op1) op2
            //  / \
            //     \
            //     ad2
            //
            // And it swaps ad2 and op2.  If (op2) is varTypeIsGC, then this implies that (tree) is
            // varTypeIsGC.  If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
            // (op1) with a child that is varTypeIsGC.  If we encounter that situation, make (op1) the same
            // type as (tree).
            //
            // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
            // necessary

            if (varTypeIsGC(op2->TypeGet()))
            {
                noway_assert(varTypeIsGC(typ));
                op1->gtType = typ;
            }
            tree->gtOp2 = ad2;

            op1->gtOp.gtOp2 = op2;
            op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;

            op2 = tree->gtOp2;
        }
    }

#endif

    /*-------------------------------------------------------------------------
     * Perform optional oper-specific postorder morphing
     */

    switch (oper)
    {
        genTreeOps cmop;
        bool       dstIsSafeLclVar;

        case GT_ASG:
            /* We'll convert "a = a <op> x" into "a <op>= x"                     */
            /*     and also  "a = x <op> a" into "a <op>= x" for communative ops */
            CLANG_FORMAT_COMMENT_ANCHOR;

            if (typ == TYP_LONG)
            {
                break;
            }

            if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
            {
                if (tree->OperIsCopyBlkOp())
                {
                    return fgMorphCopyBlock(tree);
                }
                else
                {
                    return fgMorphInitBlock(tree);
                }
            }

            /* Make sure we're allowed to do this */

            if (optValnumCSE_phase)
            {
                // It is not safe to reorder/delete CSE's
                break;
            }

            /* Are we assigning to a GT_LCL_VAR ? */

            dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);

            /* If we have a GT_LCL_VAR, then is the address taken? */
            if (dstIsSafeLclVar)
            {
                unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
                LclVarDsc* varDsc = lvaTable + lclNum;

                noway_assert(lclNum < lvaCount);

                /* Is the address taken? */
                if (varDsc->lvAddrExposed)
                {
                    dstIsSafeLclVar = false;
                }
                else if (op2->gtFlags & GTF_ASG)
                {
                    break;
                }
            }

            if (!dstIsSafeLclVar)
            {
                if (op2->gtFlags & GTF_ASG)
                {
                    break;
                }

                if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
                {
                    break;
                }
            }

            /* Special case: a cast that can be thrown away */

            if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
            {
                var_types srct;
                var_types cast;
                var_types dstt;

                srct = op2->gtCast.CastOp()->TypeGet();
                cast = (var_types)op2->CastToType();
                dstt = op1->TypeGet();

                /* Make sure these are all ints and precision is not lost */

                if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
                {
                    op2 = tree->gtOp2 = op2->gtCast.CastOp();
                }
            }

            /* Make sure we have the operator range right */

            static_assert(GT_SUB == GT_ADD + 1, "bad oper value");
            static_assert(GT_MUL == GT_ADD + 2, "bad oper value");
            static_assert(GT_DIV == GT_ADD + 3, "bad oper value");
            static_assert(GT_MOD == GT_ADD + 4, "bad oper value");
            static_assert(GT_UDIV == GT_ADD + 5, "bad oper value");
            static_assert(GT_UMOD == GT_ADD + 6, "bad oper value");

            static_assert(GT_OR == GT_ADD + 7, "bad oper value");
            static_assert(GT_XOR == GT_ADD + 8, "bad oper value");
            static_assert(GT_AND == GT_ADD + 9, "bad oper value");

            static_assert(GT_LSH == GT_ADD + 10, "bad oper value");
            static_assert(GT_RSH == GT_ADD + 11, "bad oper value");
            static_assert(GT_RSZ == GT_ADD + 12, "bad oper value");

            /* Check for a suitable operator on the RHS */

            cmop = op2->OperGet();

            switch (cmop)
            {
                case GT_NEG:
                    // GT_CHS only supported for integer types
                    if (varTypeIsFloating(tree->TypeGet()))
                    {
                        break;
                    }

                    goto ASG_OP;

                case GT_MUL:
                    // GT_ASG_MUL only supported for floating point types
                    if (!varTypeIsFloating(tree->TypeGet()))
                    {
                        break;
                    }

                    __fallthrough;

                case GT_ADD:
                case GT_SUB:
                    if (op2->gtOverflow())
                    {
                        /* Disable folding into "<op>=" if the result can be
                           visible to anyone as <op> may throw an exception and
                           the assignment should not proceed
                           We are safe with an assignment to a local variables
                         */
                        if (ehBlockHasExnFlowDsc(compCurBB))
                        {
                            break;
                        }
                        if (!dstIsSafeLclVar)
                        {
                            break;
                        }
                    }
#ifndef _TARGET_AMD64_
                    // This is hard for byte-operations as we need to make
                    // sure both operands are in RBM_BYTE_REGS.
                    if (varTypeIsByte(op2->TypeGet()))
                        break;
#endif // _TARGET_AMD64_
                    goto ASG_OP;

                case GT_DIV:
                case GT_UDIV:
                    // GT_ASG_DIV only supported for floating point types
                    if (!varTypeIsFloating(tree->TypeGet()))
                    {
                        break;
                    }

                case GT_LSH:
                case GT_RSH:
                case GT_RSZ:
                case GT_OR:
                case GT_XOR:
                case GT_AND:
                ASG_OP:
                {
                    bool bReverse       = false;
                    bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
                    if (bAsgOpFoldable)
                    {
                        if (bReverse)
                        {
                            // We will transform this from "a = x <op> a" to "a <op>= x"
                            // so we can now destroy the duplicate "a"
                            DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
                            op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
                        }

                        /* Special case: "x |= -1" and "x &= 0" */
                        if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
                            ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
                        {
                            /* Simply change to an assignment */
                            tree->gtOp2 = op2->gtOp.gtOp2;
                            break;
                        }

                        if (cmop == GT_NEG)
                        {
                            /* This is "x = -x;", use the flipsign operator */

                            tree->ChangeOper(GT_CHS);

                            if (op1->gtOper == GT_LCL_VAR)
                            {
                                op1->gtFlags |= GTF_VAR_USEASG;
                            }

                            tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());

                            break;
                        }

                        if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
                        {
                            // Changing from x = x op y to x op= y when x is a small integer type
                            // makes the op size smaller (originally the op size was 32 bits, after
                            // sign or zero extension of x, and there is an implicit truncation in the
                            // assignment).
                            // This is ok in most cases because the upper bits were
                            // lost when assigning the op result to a small type var,
                            // but it may not be ok for the right shift operation where the higher bits
                            // could be shifted into the lower bits and preserved.
                            // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
                            // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
                            // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
                            // wrong
                            // result:
                            // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
                            // but  (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
                            // The result becomes correct if we use >>unsigned instead of >>signed.
                            noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
                            cmop = GT_RSZ;
                        }

                        /* Replace with an assignment operator */
                        noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
                        noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
                        noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
                        noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
                        noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
                        noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
                        noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
                        noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);

                        tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
                        tree->gtOp2 = op2->gtOp.gtOp2;

                        /* Propagate GTF_OVERFLOW */

                        if (op2->gtOverflowEx())
                        {
                            tree->gtType = op2->gtType;
                            tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
                        }

#if FEATURE_SET_FLAGS

                        /* Propagate GTF_SET_FLAGS */
                        if (op2->gtSetFlags())
                        {
                            tree->gtRequestSetFlags();
                        }

#endif // FEATURE_SET_FLAGS

                        DEBUG_DESTROY_NODE(op2);
                        op2 = tree->gtOp2;

                        /* The target is used as well as being defined */
                        if (op1->OperIsLocal())
                        {
                            op1->gtFlags &= ~GTF_VAR_USEDEF;
                            op1->gtFlags |= GTF_VAR_USEASG;
                        }

#if CPU_HAS_FP_SUPPORT
                        /* Check for the special case "x += y * x;" */

                        // GT_ASG_MUL only supported for floating point types
                        if (cmop != GT_ADD && cmop != GT_SUB)
                        {
                            break;
                        }

                        if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
                        {
                            if (GenTree::Compare(op1, op2->gtOp.gtOp1))
                            {
                                /* Change "x += x * y" into "x *= (y + 1)" */

                                op2 = op2->gtOp.gtOp2;
                            }
                            else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
                            {
                                /* Change "x += y * x" into "x *= (y + 1)" */

                                op2 = op2->gtOp.gtOp1;
                            }
                            else
                            {
                                break;
                            }

                            op1 = gtNewDconNode(1.0);

                            /* Now make the "*=" node */

                            if (cmop == GT_ADD)
                            {
                                /* Change "x += x * y" into "x *= (y + 1)" */

                                tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
                            }
                            else
                            {
                                /* Change "x -= x * y" into "x *= (1 - y)" */

                                noway_assert(cmop == GT_SUB);
                                tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
                            }
                            tree->ChangeOper(GT_ASG_MUL);
                        }
#endif // CPU_HAS_FP_SUPPORT
                    }
                }

                break;

                case GT_NOT:

                    /* Is the destination identical to the first RHS sub-operand? */

                    if (GenTree::Compare(op1, op2->gtOp.gtOp1))
                    {
                        /* This is "x = ~x" which is the same as "x ^= -1"
                         * Transform the node into a GT_ASG_XOR */

                        noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);

                        op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);

                        cmop = GT_XOR;
                        goto ASG_OP;
                    }

                    break;
                default:
                    break;
            }

            break;

        case GT_MUL:

            /* Check for the case "(val + icon) * icon" */

            if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
            {
                GenTreePtr add = op1->gtOp.gtOp2;

                if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
                {
                    if (tree->gtOverflow() || op1->gtOverflow())
                    {
                        break;
                    }

                    ssize_t imul = op2->gtIntCon.gtIconVal;
                    ssize_t iadd = add->gtIntCon.gtIconVal;

                    /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */

                    oper = GT_ADD;
                    tree->ChangeOper(oper);

                    op2->gtIntCon.gtIconVal = iadd * imul;

                    op1->ChangeOper(GT_MUL);

                    add->gtIntCon.gtIconVal = imul;
#ifdef _TARGET_64BIT_
                    if (add->gtType == TYP_INT)
                    {
                        // we need to properly re-sign-extend or truncate after multiplying two int constants above
                        add->AsIntCon()->TruncateOrSignExtend32();
                    }
#endif //_TARGET_64BIT_
                }
            }

            break;

        case GT_DIV:

            /* For "val / 1", just return "val" */

            if (op2->IsIntegralConst(1))
            {
                DEBUG_DESTROY_NODE(tree);
                return op1;
            }

            break;

        case GT_LSH:

            /* Check for the case "(val + icon) << icon" */

            if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
            {
                GenTreePtr cns = op1->gtOp.gtOp2;

                if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
                {
                    ssize_t ishf = op2->gtIntConCommon.IconValue();
                    ssize_t iadd = cns->gtIntConCommon.IconValue();

                    // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");

                    /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */

                    tree->ChangeOper(GT_ADD);
                    ssize_t result = iadd << ishf;
                    op2->gtIntConCommon.SetIconValue(result);
#ifdef _TARGET_64BIT_
                    if (op1->gtType == TYP_INT)
                    {
                        op2->AsIntCon()->TruncateOrSignExtend32();
                    }
#endif // _TARGET_64BIT_

                    // we are reusing the shift amount node here, but the type we want is that of the shift result
                    op2->gtType = op1->gtType;

                    if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
                        cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
                    {
                        assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
                        op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
                    }

                    op1->ChangeOper(GT_LSH);

                    cns->gtIntConCommon.SetIconValue(ishf);
                }
            }

            break;

        case GT_XOR:

            if (!optValnumCSE_phase)
            {
                /* "x ^ -1" is "~x" */

                if (op2->IsIntegralConst(-1))
                {
                    tree->ChangeOper(GT_NOT);
                    tree->gtOp2 = nullptr;
                    DEBUG_DESTROY_NODE(op2);
                }
                else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
                {
                    /* "binaryVal ^ 1" is "!binaryVal" */
                    gtReverseCond(op1);
                    DEBUG_DESTROY_NODE(op2);
                    DEBUG_DESTROY_NODE(tree);
                    return op1;
                }
            }

            break;

        case GT_INIT_VAL:
            // Initialization values for initBlk have special semantics - their lower
            // byte is used to fill the struct. However, we allow 0 as a "bare" value,
            // which enables them to get a VNForZero, and be propagated.
            if (op1->IsIntegralConst(0))
            {
                return op1;
            }
            break;

        default:
            break;
    }
    return tree;
}

//------------------------------------------------------------------------
// fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
// (see ECMA III 3.55 and III.3.56).
//
// Arguments:
//    tree - The GT_MOD/GT_UMOD tree to morph
//
// Returns:
//    The morphed tree
//
// Notes:
//    For ARM64 we don't have a remainder instruction so this transform is
//    always done. For XARCH this transform is done if we know that magic
//    division will be used, in that case this transform allows CSE to
//    eliminate the redundant div from code like "x = a / 3; y = a % 3;".
//
//    This method will produce the above expression in 'a' and 'b' are
//    leaf nodes, otherwise, if any of them is not a leaf it will spill
//    its value into a temporary variable, an example:
//    (x * 2 - 1) % (y + 1) ->  t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
//
GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
{
    if (tree->OperGet() == GT_MOD)
    {
        tree->SetOper(GT_DIV);
    }
    else if (tree->OperGet() == GT_UMOD)
    {
        tree->SetOper(GT_UDIV);
    }
    else
    {
        noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
    }

    var_types type        = tree->gtType;
    GenTree*  denominator = tree->gtOp2;
    GenTree*  numerator   = tree->gtOp1;

    if (!numerator->OperIsLeaf())
    {
        numerator = fgMakeMultiUse(&tree->gtOp1);
    }
    else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
    {
        // Morphing introduces new lclVar references. Increase ref counts
        lvaIncRefCnts(numerator);
    }

    if (!denominator->OperIsLeaf())
    {
        denominator = fgMakeMultiUse(&tree->gtOp2);
    }
    else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
    {
        // Morphing introduces new lclVar references. Increase ref counts
        lvaIncRefCnts(denominator);
    }

    // The numerator and denominator may have been assigned to temps, in which case
    // their defining assignments are in the current tree. Therefore, we need to
    // set the execuction order accordingly on the nodes we create.
    // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
    // be set to be evaluated in reverse order.
    //
    GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
    assert(!mul->IsReverseOp());
    GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
    sub->gtFlags |= GTF_REVERSE_OPS;

#ifdef DEBUG
    sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif

    return sub;
}

//------------------------------------------------------------------------------
// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
//
//
// Arguments:
//    oper  - Operation to check
//
// Return Value:
//    True if the operation can be a root of a bitwise rotation tree; false otherwise.

bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
{
    return (oper == GT_OR) || (oper == GT_XOR);
}

//------------------------------------------------------------------------------
// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
//                                      an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
//
// Arguments:
//    tree  - tree to check for a rotation pattern
//
// Return Value:
//    An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
//
// Assumption:
//    The input is a GT_OR or a GT_XOR tree.

GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
{
#ifndef LEGACY_BACKEND
    //
    // Check for a rotation pattern, e.g.,
    //
    //                         OR                      ROL
    //                      /      \                   / \
    //                    LSH      RSZ      ->        x   y
    //                    / \      / \
    //                   x  AND   x  AND
    //                      / \      / \
    //                     y  31   ADD  31
    //                             / \
    //                            NEG 32
    //                             |
    //                             y
    // The patterns recognized:
    // (x << (y & M)) op (x >>> ((-y + N) & M))
    // (x >>> ((-y + N) & M)) op (x << (y & M))
    //
    // (x << y) op (x >>> (-y + N))
    // (x >> > (-y + N)) op (x << y)
    //
    // (x >>> (y & M)) op (x << ((-y + N) & M))
    // (x << ((-y + N) & M)) op (x >>> (y & M))
    //
    // (x >>> y) op (x << (-y + N))
    // (x << (-y + N)) op (x >>> y)
    //
    // (x << c1) op (x >>> c2)
    // (x >>> c1) op (x << c2)
    //
    // where
    // c1 and c2 are const
    // c1 + c2 == bitsize(x)
    // N == bitsize(x)
    // M is const
    // M & (N - 1) == N - 1
    // op is either | or ^

    if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
    {
        // We can't do anything if the tree has assignments, calls, or volatile
        // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
        // thrown by the original tree will be thrown by the transformed tree as well.
        return tree;
    }

    genTreeOps oper = tree->OperGet();
    assert(fgOperIsBitwiseRotationRoot(oper));

    // Check if we have an LSH on one side of the OR and an RSZ on the other side.
    GenTreePtr op1            = tree->gtGetOp1();
    GenTreePtr op2            = tree->gtGetOp2();
    GenTreePtr leftShiftTree  = nullptr;
    GenTreePtr rightShiftTree = nullptr;
    if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
    {
        leftShiftTree  = op1;
        rightShiftTree = op2;
    }
    else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
    {
        leftShiftTree  = op2;
        rightShiftTree = op1;
    }
    else
    {
        return tree;
    }

    // Check if the trees representing the value to shift are identical.
    // We already checked that there are no side effects above.
    if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
    {
        GenTreePtr rotatedValue           = leftShiftTree->gtGetOp1();
        var_types  rotatedValueActualType = genActualType(rotatedValue->gtType);
        ssize_t    rotatedValueBitSize    = genTypeSize(rotatedValueActualType) * 8;
        noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
        GenTreePtr leftShiftIndex  = leftShiftTree->gtGetOp2();
        GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();

        // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
        // shouldn't be masked for the transformation to be valid. If additional
        // higher bits are not masked, the transformation is still valid since the result
        // of MSIL shift instructions is unspecified if the shift amount is greater or equal
        // than the width of the value being shifted.
        ssize_t minimalMask    = rotatedValueBitSize - 1;
        ssize_t leftShiftMask  = -1;
        ssize_t rightShiftMask = -1;

        if ((leftShiftIndex->OperGet() == GT_AND))
        {
            if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
            {
                leftShiftMask  = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
                leftShiftIndex = leftShiftIndex->gtGetOp1();
            }
            else
            {
                return tree;
            }
        }

        if ((rightShiftIndex->OperGet() == GT_AND))
        {
            if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
            {
                rightShiftMask  = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
                rightShiftIndex = rightShiftIndex->gtGetOp1();
            }
            else
            {
                return tree;
            }
        }

        if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
        {
            // The shift index is overmasked, e.g., we have
            // something like (x << y & 15) or
            // (x >> (32 - y) & 15 with 32 bit x.
            // The transformation is not valid.
            return tree;
        }

        GenTreePtr shiftIndexWithAdd    = nullptr;
        GenTreePtr shiftIndexWithoutAdd = nullptr;
        genTreeOps rotateOp             = GT_NONE;
        GenTreePtr rotateIndex          = nullptr;

        if (leftShiftIndex->OperGet() == GT_ADD)
        {
            shiftIndexWithAdd    = leftShiftIndex;
            shiftIndexWithoutAdd = rightShiftIndex;
            rotateOp             = GT_ROR;
        }
        else if (rightShiftIndex->OperGet() == GT_ADD)
        {
            shiftIndexWithAdd    = rightShiftIndex;
            shiftIndexWithoutAdd = leftShiftIndex;
            rotateOp             = GT_ROL;
        }

        if (shiftIndexWithAdd != nullptr)
        {
            if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
            {
                if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
                {
                    if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
                    {
                        if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
                        {
                            // We found one of these patterns:
                            // (x << (y & M)) | (x >>> ((-y + N) & M))
                            // (x << y) | (x >>> (-y + N))
                            // (x >>> (y & M)) | (x << ((-y + N) & M))
                            // (x >>> y) | (x << (-y + N))
                            // where N == bitsize(x), M is const, and
                            // M & (N - 1) == N - 1
                            CLANG_FORMAT_COMMENT_ANCHOR;

#ifndef _TARGET_64BIT_
                            if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
                            {
                                // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
                                // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
                                // to add helpers for GT_ROL and GT_ROR.
                                return tree;
                            }
#endif

                            rotateIndex = shiftIndexWithoutAdd;
                        }
                    }
                }
            }
        }
        else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
        {
            if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
            {
                // We found this pattern:
                // (x << c1) | (x >>> c2)
                // where c1 and c2 are const and c1 + c2 == bitsize(x)
                rotateOp    = GT_ROL;
                rotateIndex = leftShiftIndex;
            }
        }

        if (rotateIndex != nullptr)
        {
            noway_assert(GenTree::OperIsRotate(rotateOp));

            unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;

            // We can use the same tree only during global morph; reusing the tree in a later morph
            // may invalidate value numbers.
            if (fgGlobalMorph)
            {
                tree->gtOp.gtOp1 = rotatedValue;
                tree->gtOp.gtOp2 = rotateIndex;
                tree->ChangeOper(rotateOp);

                unsigned childFlags = 0;
                for (GenTree* op : tree->Operands())
                {
                    childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
                }

                // The parent's flags should be a superset of its operands' flags
                noway_assert((inputTreeEffects & childFlags) == childFlags);
            }
            else
            {
                tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
                noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
            }

            return tree;
        }
    }
#endif // LEGACY_BACKEND
    return tree;
}

#if !CPU_HAS_FP_SUPPORT
GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
{

    genTreeOps oper = tree->OperGet();
    var_types  typ  = tree->TypeGet();
    GenTreePtr op1  = tree->gtOp.gtOp1;
    GenTreePtr op2  = tree->gtGetOp2IfPresent();

    /*
        We have to use helper calls for all FP operations:

            FP operators that operate on FP values
            casts to and from FP
            comparisons of FP values
     */

    if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
    {
        int        helper;
        GenTreePtr args;
        size_t     argc = genTypeStSz(typ);

        /* Not all FP operations need helper calls */

        switch (oper)
        {
            case GT_ASG:
            case GT_IND:
            case GT_LIST:
            case GT_ADDR:
            case GT_COMMA:
                return tree;
        }

#ifdef DEBUG

        /* If the result isn't FP, it better be a compare or cast */

        if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
            gtDispTree(tree);

        noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
#endif

        /* Keep track of how many arguments we're passing */

        fgPtrArgCntCur += argc;

        /* Is this a binary operator? */

        if (op2)
        {
            /* Add the second operand to the argument count */

            fgPtrArgCntCur += argc;
            argc *= 2;

            /* What kind of an operator do we have? */

            switch (oper)
            {
                case GT_ADD:
                    helper = CPX_R4_ADD;
                    break;
                case GT_SUB:
                    helper = CPX_R4_SUB;
                    break;
                case GT_MUL:
                    helper = CPX_R4_MUL;
                    break;
                case GT_DIV:
                    helper = CPX_R4_DIV;
                    break;
                // case GT_MOD: helper = CPX_R4_REM; break;

                case GT_EQ:
                    helper = CPX_R4_EQ;
                    break;
                case GT_NE:
                    helper = CPX_R4_NE;
                    break;
                case GT_LT:
                    helper = CPX_R4_LT;
                    break;
                case GT_LE:
                    helper = CPX_R4_LE;
                    break;
                case GT_GE:
                    helper = CPX_R4_GE;
                    break;
                case GT_GT:
                    helper = CPX_R4_GT;
                    break;

                default:
#ifdef DEBUG
                    gtDispTree(tree);
#endif
                    noway_assert(!"unexpected FP binary op");
                    break;
            }

            args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
        }
        else
        {
            switch (oper)
            {
                case GT_RETURN:
                    return tree;

                case GT_CAST:
                    noway_assert(!"FP cast");

                case GT_NEG:
                    helper = CPX_R4_NEG;
                    break;

                default:
#ifdef DEBUG
                    gtDispTree(tree);
#endif
                    noway_assert(!"unexpected FP unary op");
                    break;
            }

            args = gtNewArgList(tree->gtOp.gtOp1);
        }

        /* If we have double result/operands, modify the helper */

        if (typ == TYP_DOUBLE)
        {
            noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
            noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
            noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
            noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
            noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);

            helper++;
        }
        else
        {
            noway_assert(tree->OperIsCompare());

            noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
            noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
            noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
            noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
            noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
            noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
        }

        tree = fgMorphIntoHelperCall(tree, helper, args);

        if (fgPtrArgCntMax < fgPtrArgCntCur)
        {
            JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
            fgPtrArgCntMax = fgPtrArgCntCur;
        }

        fgPtrArgCntCur -= argc;
        return tree;

        case GT_RETURN:

            if (op1)
            {

                if (compCurBB == genReturnBB)
                {
                    /* This is the 'exitCrit' call at the exit label */

                    noway_assert(op1->gtType == TYP_VOID);
                    noway_assert(op2 == 0);

                    tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);

                    return tree;
                }

                /* This is a (real) return value -- check its type */
                CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef DEBUG
                if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
                {
                    bool allowMismatch = false;

                    // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
                    if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
                        (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
                        allowMismatch = true;

                    if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
                        allowMismatch = true;

                    if (!allowMismatch)
                        NO_WAY("Return type mismatch");
                }
#endif
            }
            break;
    }
    return tree;
}
#endif

/*****************************************************************************
 *
 *  Transform the given tree for code generation and return an equivalent tree.
 */

GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
{
    noway_assert(tree);
    noway_assert(tree->gtOper != GT_STMT);

#ifdef DEBUG
    if (verbose)
    {
        if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
        {
            noway_assert(!"JitBreakMorphTree hit");
        }
    }
#endif

#ifdef DEBUG
    int thisMorphNum = 0;
    if (verbose && treesBeforeAfterMorph)
    {
        thisMorphNum = morphNum++;
        printf("\nfgMorphTree (before %d):\n", thisMorphNum);
        gtDispTree(tree);
    }
#endif

    if (fgGlobalMorph)
    {
        // Apply any rewrites for implicit byref arguments before morphing the
        // tree.

        if (fgMorphImplicitByRefArgs(tree))
        {
#ifdef DEBUG
            if (verbose && treesBeforeAfterMorph)
            {
                printf("\nfgMorphTree (%d), after implicit-byref rewrite:\n", thisMorphNum);
                gtDispTree(tree);
            }
#endif
        }
    }

/*-------------------------------------------------------------------------
 * fgMorphTree() can potentially replace a tree with another, and the
 * caller has to store the return value correctly.
 * Turn this on to always make copy of "tree" here to shake out
 * hidden/unupdated references.
 */

#ifdef DEBUG

    if (compStressCompile(STRESS_GENERIC_CHECK, 0))
    {
        GenTreePtr copy;

#ifdef SMALL_TREE_NODES
        if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
        {
            copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
        }
        else
#endif
        {
            copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
        }

        copy->CopyFrom(tree, this);

#if defined(LATE_DISASM)
        // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
        if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
        {
            copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
            copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
        }
#endif

        DEBUG_DESTROY_NODE(tree);
        tree = copy;
    }
#endif // DEBUG

    if (fgGlobalMorph)
    {
        /* Ensure that we haven't morphed this node already */
        assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");

#if LOCAL_ASSERTION_PROP
        /* Before morphing the tree, we try to propagate any active assertions */
        if (optLocalAssertionProp)
        {
            /* Do we have any active assertions? */

            if (optAssertionCount > 0)
            {
                GenTreePtr newTree = tree;
                while (newTree != nullptr)
                {
                    tree = newTree;
                    /* newTree is non-Null if we propagated an assertion */
                    newTree = optAssertionProp(apFull, tree, nullptr);
                }
                noway_assert(tree != nullptr);
            }
        }
        PREFAST_ASSUME(tree != nullptr);
#endif
    }

    /* Save the original un-morphed tree for fgMorphTreeDone */

    GenTreePtr oldTree = tree;

    /* Figure out what kind of a node we have */

    unsigned kind = tree->OperKind();

    /* Is this a constant node? */

    if (kind & GTK_CONST)
    {
        tree = fgMorphConst(tree);
        goto DONE;
    }

    /* Is this a leaf node? */

    if (kind & GTK_LEAF)
    {
        tree = fgMorphLeaf(tree);
        goto DONE;
    }

    /* Is it a 'simple' unary/binary operator? */

    if (kind & GTK_SMPOP)
    {
        tree = fgMorphSmpOp(tree, mac);
        goto DONE;
    }

    /* See what kind of a special operator we have here */

    switch (tree->OperGet())
    {
        case GT_FIELD:
            tree = fgMorphField(tree, mac);
            break;

        case GT_CALL:
            tree = fgMorphCall(tree->AsCall());
            break;

        case GT_ARR_BOUNDS_CHECK:
#ifdef FEATURE_SIMD
        case GT_SIMD_CHK:
#endif // FEATURE_SIMD
        {
            fgSetRngChkTarget(tree);

            GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
            bndsChk->gtIndex          = fgMorphTree(bndsChk->gtIndex);
            bndsChk->gtArrLen         = fgMorphTree(bndsChk->gtArrLen);
            // If the index is a comma(throw, x), just return that.
            if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
            {
                tree = bndsChk->gtIndex;
            }

            // Propagate effects flags upwards
            bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
            bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);

            // Otherwise, we don't change the tree.
        }
        break;

        case GT_ARR_ELEM:
            tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
            tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;

            unsigned dim;
            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
            {
                tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
                tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
            }
            if (fgGlobalMorph)
            {
                fgSetRngChkTarget(tree, false);
            }
            break;

        case GT_ARR_OFFSET:
            tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
            tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
            tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
            tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
            tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
            tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
            if (fgGlobalMorph)
            {
                fgSetRngChkTarget(tree, false);
            }
            break;

        case GT_CMPXCHG:
            tree->gtCmpXchg.gtOpLocation  = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
            tree->gtCmpXchg.gtOpValue     = fgMorphTree(tree->gtCmpXchg.gtOpValue);
            tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
            break;

        case GT_STORE_DYN_BLK:
            tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
            __fallthrough;
        case GT_DYN_BLK:
            tree->gtDynBlk.Addr()        = fgMorphTree(tree->gtDynBlk.Addr());
            tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
            break;

        default:
#ifdef DEBUG
            gtDispTree(tree);
#endif
            noway_assert(!"unexpected operator");
    }
DONE:

    fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));

    return tree;
}

#if LOCAL_ASSERTION_PROP
//------------------------------------------------------------------------
// fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
//
// Arguments:
//    lclNum - The varNum of the lclVar for which we're killing assertions.
//    tree   - (DEBUG only) the tree responsible for killing its assertions.
//
void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
{
    /* All dependent assertions are killed here */

    ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));

    if (killed)
    {
        AssertionIndex index = optAssertionCount;
        while (killed && (index > 0))
        {
            if (BitVecOps::IsMember(apTraits, killed, index - 1))
            {
#ifdef DEBUG
                AssertionDsc* curAssertion = optGetAssertion(index);
                noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
                             ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
                if (verbose)
                {
                    printf("\nThe assignment ");
                    printTreeID(tree);
                    printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
                    optPrintAssertion(curAssertion);
                }
#endif
                // Remove this bit from the killed mask
                BitVecOps::RemoveElemD(apTraits, killed, index - 1);

                optAssertionRemove(index);
            }

            index--;
        }

        // killed mask should now be zero
        noway_assert(BitVecOps::IsEmpty(apTraits, killed));
    }
}
//------------------------------------------------------------------------
// fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
//
// Arguments:
//    lclNum - The varNum of the lclVar for which we're killing assertions.
//    tree   - (DEBUG only) the tree responsible for killing its assertions.
//
// Notes:
//    For structs and struct fields, it will invalidate the children and parent
//    respectively.
//    Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
//
void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
{
    LclVarDsc* varDsc = &lvaTable[lclNum];

    if (varDsc->lvPromoted)
    {
        noway_assert(varTypeIsStruct(varDsc));

        // Kill the field locals.
        for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
        {
            fgKillDependentAssertionsSingle(i DEBUGARG(tree));
        }

        // Kill the struct local itself.
        fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
    }
    else if (varDsc->lvIsStructField)
    {
        // Kill the field local.
        fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));

        // Kill the parent struct.
        fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
    }
    else
    {
        fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
    }
}
#endif // LOCAL_ASSERTION_PROP

/*****************************************************************************
 *
 *  This function is called to complete the morphing of a tree node
 *  It should only be called once for each node.
 *  If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
 *  to enforce the invariant that each node is only morphed once.
 *  If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
 *  by an equivalent tree.
 *
 */

void Compiler::fgMorphTreeDone(GenTreePtr tree,
                               GenTreePtr oldTree /* == NULL */
                               DEBUGARG(int morphNum))
{
#ifdef DEBUG
    if (verbose && treesBeforeAfterMorph)
    {
        printf("\nfgMorphTree (after %d):\n", morphNum);
        gtDispTree(tree);
        printf(""); // in our logic this causes a flush
    }
#endif

    if (!fgGlobalMorph)
    {
        return;
    }

    if ((oldTree != nullptr) && (oldTree != tree))
    {
        /* Ensure that we have morphed this node */
        assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");

#ifdef DEBUG
        TransferTestDataToNode(oldTree, tree);
#endif
    }
    else
    {
        // Ensure that we haven't morphed this node already
        assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
    }

    if (tree->OperKind() & GTK_CONST)
    {
        goto DONE;
    }

#if LOCAL_ASSERTION_PROP

    if (!optLocalAssertionProp)
    {
        goto DONE;
    }

    /* Do we have any active assertions? */

    if (optAssertionCount > 0)
    {
        /* Is this an assignment to a local variable */
        GenTreeLclVarCommon* lclVarTree = nullptr;
        if (tree->DefinesLocal(this, &lclVarTree))
        {
            unsigned lclNum = lclVarTree->gtLclNum;
            noway_assert(lclNum < lvaCount);
            fgKillDependentAssertions(lclNum DEBUGARG(tree));
        }
    }

    /* If this tree makes a new assertion - make it available */
    optAssertionGen(tree);

#endif // LOCAL_ASSERTION_PROP

DONE:;

#ifdef DEBUG
    /* Mark this node as being morphed */
    tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif
}

/*****************************************************************************
 *
 *  Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
 *  Returns true if we modified the flow graph
 */

bool Compiler::fgFoldConditional(BasicBlock* block)
{
    bool result = false;

    // We don't want to make any code unreachable
    if (opts.compDbgCode || opts.MinOpts())
    {
        return false;
    }

    if (block->bbJumpKind == BBJ_COND)
    {
        noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);

        GenTreePtr stmt = block->bbTreeList->gtPrev;

        noway_assert(stmt->gtNext == nullptr);

        if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
        {
            noway_assert(fgRemoveRestOfBlock);

            /* Unconditional throw - transform the basic block into a BBJ_THROW */
            fgConvertBBToThrowBB(block);

            /* Remove 'block' from the predecessor list of 'block->bbNext' */
            fgRemoveRefPred(block->bbNext, block);

            /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
            fgRemoveRefPred(block->bbJumpDest, block);

#ifdef DEBUG
            if (verbose)
            {
                printf("\nConditional folded at BB%02u\n", block->bbNum);
                printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
            }
#endif
            goto DONE_COND;
        }

        noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);

        /* Did we fold the conditional */

        noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
        GenTreePtr cond;
        cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;

        if (cond->OperKind() & GTK_CONST)
        {
            /* Yupee - we folded the conditional!
             * Remove the conditional statement */

            noway_assert(cond->gtOper == GT_CNS_INT);
            noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));

            /* remove the statement from bbTreelist - No need to update
             * the reference counts since there are no lcl vars */
            fgRemoveStmt(block, stmt);

            // block is a BBJ_COND that we are folding the conditional for
            // bTaken is the path that will always be taken from block
            // bNotTaken is the path that will never be taken from block
            //
            BasicBlock* bTaken;
            BasicBlock* bNotTaken;

            if (cond->gtIntCon.gtIconVal != 0)
            {
                /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
                block->bbJumpKind = BBJ_ALWAYS;
                bTaken            = block->bbJumpDest;
                bNotTaken         = block->bbNext;
            }
            else
            {
                /* Unmark the loop if we are removing a backwards branch */
                /* dest block must also be marked as a loop head and     */
                /* We must be able to reach the backedge block           */
                if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
                    fgReachable(block->bbJumpDest, block))
                {
                    optUnmarkLoopBlocks(block->bbJumpDest, block);
                }

                /* JTRUE 0 - transform the basic block into a BBJ_NONE   */
                block->bbJumpKind = BBJ_NONE;
                noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
                bTaken    = block->bbNext;
                bNotTaken = block->bbJumpDest;
            }

            if (fgHaveValidEdgeWeights)
            {
                // We are removing an edge from block to bNotTaken
                // and we have already computed the edge weights, so
                // we will try to adjust some of the weights
                //
                flowList*   edgeTaken = fgGetPredForBlock(bTaken, block);
                BasicBlock* bUpdated  = nullptr; // non-NULL if we updated the weight of an internal block

                // We examine the taken edge (block -> bTaken)
                // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
                // else if bTaken has valid profile weight and block does not we try to adjust block's weight
                // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
                //
                if (block->hasProfileWeight())
                {
                    // The edge weights for (block -> bTaken) are 100% of block's weight
                    edgeTaken->flEdgeWeightMin = block->bbWeight;
                    edgeTaken->flEdgeWeightMax = block->bbWeight;

                    if (!bTaken->hasProfileWeight())
                    {
                        if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
                        {
                            // Update the weight of bTaken
                            bTaken->inheritWeight(block);
                            bUpdated = bTaken;
                        }
                    }
                }
                else if (bTaken->hasProfileWeight())
                {
                    if (bTaken->countOfInEdges() == 1)
                    {
                        // There is only one in edge to bTaken
                        edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
                        edgeTaken->flEdgeWeightMax = bTaken->bbWeight;

                        // Update the weight of block
                        block->inheritWeight(bTaken);
                        bUpdated = block;
                    }
                }

                if (bUpdated != nullptr)
                {
                    flowList* edge;
                    // Now fix the weights of the edges out of 'bUpdated'
                    switch (bUpdated->bbJumpKind)
                    {
                        case BBJ_NONE:
                            edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
                            edge->flEdgeWeightMax = bUpdated->bbWeight;
                            break;
                        case BBJ_COND:
                            edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
                            edge->flEdgeWeightMax = bUpdated->bbWeight;
                            __fallthrough;
                        case BBJ_ALWAYS:
                            edge                  = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
                            edge->flEdgeWeightMax = bUpdated->bbWeight;
                            break;
                        default:
                            // We don't handle BBJ_SWITCH
                            break;
                    }
                }
            }

            /* modify the flow graph */

            /* Remove 'block' from the predecessor list of 'bNotTaken' */
            fgRemoveRefPred(bNotTaken, block);

#ifdef DEBUG
            if (verbose)
            {
                printf("\nConditional folded at BB%02u\n", block->bbNum);
                printf("BB%02u becomes a %s", block->bbNum,
                       block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
                if (block->bbJumpKind == BBJ_ALWAYS)
                {
                    printf(" to BB%02u", block->bbJumpDest->bbNum);
                }
                printf("\n");
            }
#endif

            /* if the block was a loop condition we may have to modify
             * the loop table */

            for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
            {
                /* Some loops may have been already removed by
                 * loop unrolling or conditional folding */

                if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
                {
                    continue;
                }

                /* We are only interested in the loop bottom */

                if (optLoopTable[loopNum].lpBottom == block)
                {
                    if (cond->gtIntCon.gtIconVal == 0)
                    {
                        /* This was a bogus loop (condition always false)
                         * Remove the loop from the table */

                        optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
#ifdef DEBUG
                        if (verbose)
                        {
                            printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
                                   optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
                        }
#endif
                    }
                }
            }
        DONE_COND:
            result = true;
        }
    }
    else if (block->bbJumpKind == BBJ_SWITCH)
    {
        noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);

        GenTreePtr stmt = block->bbTreeList->gtPrev;

        noway_assert(stmt->gtNext == nullptr);

        if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
        {
            noway_assert(fgRemoveRestOfBlock);

            /* Unconditional throw - transform the basic block into a BBJ_THROW */
            fgConvertBBToThrowBB(block);

            /* update the flow graph */

            unsigned     jumpCnt = block->bbJumpSwt->bbsCount;
            BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;

            for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
            {
                BasicBlock* curJump = *jumpTab;

                /* Remove 'block' from the predecessor list of 'curJump' */
                fgRemoveRefPred(curJump, block);
            }

#ifdef DEBUG
            if (verbose)
            {
                printf("\nConditional folded at BB%02u\n", block->bbNum);
                printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
            }
#endif
            goto DONE_SWITCH;
        }

        noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);

        /* Did we fold the conditional */

        noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
        GenTreePtr cond;
        cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;

        if (cond->OperKind() & GTK_CONST)
        {
            /* Yupee - we folded the conditional!
             * Remove the conditional statement */

            noway_assert(cond->gtOper == GT_CNS_INT);

            /* remove the statement from bbTreelist - No need to update
             * the reference counts since there are no lcl vars */
            fgRemoveStmt(block, stmt);

            /* modify the flow graph */

            /* Find the actual jump target */
            unsigned switchVal;
            switchVal = (unsigned)cond->gtIntCon.gtIconVal;
            unsigned jumpCnt;
            jumpCnt = block->bbJumpSwt->bbsCount;
            BasicBlock** jumpTab;
            jumpTab = block->bbJumpSwt->bbsDstTab;
            bool foundVal;
            foundVal = false;

            for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
            {
                BasicBlock* curJump = *jumpTab;

                assert(curJump->countOfInEdges() > 0);

                // If val matches switchVal or we are at the last entry and
                // we never found the switch value then set the new jump dest

                if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
                {
                    if (curJump != block->bbNext)
                    {
                        /* transform the basic block into a BBJ_ALWAYS */
                        block->bbJumpKind = BBJ_ALWAYS;
                        block->bbJumpDest = curJump;

                        // if we are jumping backwards, make sure we have a GC Poll.
                        if (curJump->bbNum > block->bbNum)
                        {
                            block->bbFlags &= ~BBF_NEEDS_GCPOLL;
                        }
                    }
                    else
                    {
                        /* transform the basic block into a BBJ_NONE */
                        block->bbJumpKind = BBJ_NONE;
                        block->bbFlags &= ~BBF_NEEDS_GCPOLL;
                    }
                    foundVal = true;
                }
                else
                {
                    /* Remove 'block' from the predecessor list of 'curJump' */
                    fgRemoveRefPred(curJump, block);
                }
            }
#ifdef DEBUG
            if (verbose)
            {
                printf("\nConditional folded at BB%02u\n", block->bbNum);
                printf("BB%02u becomes a %s", block->bbNum,
                       block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
                if (block->bbJumpKind == BBJ_ALWAYS)
                {
                    printf(" to BB%02u", block->bbJumpDest->bbNum);
                }
                printf("\n");
            }
#endif
        DONE_SWITCH:
            result = true;
        }
    }
    return result;
}

//*****************************************************************************
//
// Morphs a single statement in a block.
// Can be called anytime, unlike fgMorphStmts() which should only be called once.
//
// Returns true  if 'stmt' was removed from the block.
// Returns false if 'stmt' is still in the block (even if other statements were removed).
//

bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
{
    assert(block != nullptr);
    assert(stmt != nullptr);

    compCurBB   = block;
    compCurStmt = stmt;

    GenTree* morph = fgMorphTree(stmt->gtStmtExpr);

    // Bug 1106830 - During the CSE phase we can't just remove
    // morph->gtOp.gtOp2 as it could contain CSE expressions.
    // This leads to a noway_assert in OptCSE.cpp when
    // searching for the removed CSE ref. (using gtFindLink)
    //
    if (!optValnumCSE_phase)
    {
        // Check for morph as a GT_COMMA with an unconditional throw
        if (fgIsCommaThrow(morph, true))
        {
#ifdef DEBUG
            if (verbose)
            {
                printf("Folding a top-level fgIsCommaThrow stmt\n");
                printf("Removing op2 as unreachable:\n");
                gtDispTree(morph->gtOp.gtOp2);
                printf("\n");
            }
#endif
            // Use the call as the new stmt
            morph = morph->gtOp.gtOp1;
            noway_assert(morph->gtOper == GT_CALL);
        }

        // we can get a throw as a statement root
        if (fgIsThrow(morph))
        {
#ifdef DEBUG
            if (verbose)
            {
                printf("We have a top-level fgIsThrow stmt\n");
                printf("Removing the rest of block as unreachable:\n");
            }
#endif
            noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
            fgRemoveRestOfBlock = true;
        }
    }

    stmt->gtStmtExpr = morph;

    if (lvaLocalVarRefCounted)
    {
        // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
        lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
    }

    // Can the entire tree be removed?
    bool removedStmt = fgCheckRemoveStmt(block, stmt);

    // Or this is the last statement of a conditional branch that was just folded?
    if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
    {
        if (fgFoldConditional(block))
        {
            if (block->bbJumpKind != BBJ_THROW)
            {
                removedStmt = true;
            }
        }
    }

    if (!removedStmt)
    {
        // Have to re-do the evaluation order since for example some later code does not expect constants as op1
        gtSetStmtInfo(stmt);

        // Have to re-link the nodes for this statement
        fgSetStmtSeq(stmt);
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
        gtDispTree(morph);
        printf("\n");
    }
#endif

    if (fgRemoveRestOfBlock)
    {
        // Remove the rest of the stmts in the block
        for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
        {
            fgRemoveStmt(block, stmt);
        }

        // The rest of block has been removed and we will always throw an exception.

        // Update succesors of block
        fgRemoveBlockAsPred(block);

        // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
        // We should not convert it to a ThrowBB.
        if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
        {
            // Convert block to a throw bb
            fgConvertBBToThrowBB(block);
        }

#ifdef DEBUG
        if (verbose)
        {
            printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
        }
#endif
        fgRemoveRestOfBlock = false;
    }

    return removedStmt;
}

/*****************************************************************************
 *
 *  Morph the statements of the given block.
 *  This function should be called just once for a block. Use fgMorphBlockStmt()
 *  for reentrant calls.
 */

void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
{
    fgRemoveRestOfBlock = false;

    noway_assert(fgExpandInline == false);

    /* Make the current basic block address available globally */

    compCurBB = block;

    *mult = *lnot = *loadw = false;

    fgCurrentlyInUseArgTemps = hashBv::Create(this);

    GenTreeStmt* stmt = block->firstStmt();
    GenTreePtr   prev = nullptr;
    for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
    {
        noway_assert(stmt->gtOper == GT_STMT);

        if (fgRemoveRestOfBlock)
        {
            fgRemoveStmt(block, stmt);
            continue;
        }
#ifdef FEATURE_SIMD
        if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
        {
            fgMorphCombineSIMDFieldAssignments(block, stmt);
        }
#endif

        fgMorphStmt     = stmt;
        compCurStmt     = stmt;
        GenTreePtr tree = stmt->gtStmtExpr;

#ifdef DEBUG
        compCurStmtNum++;
        if (stmt == block->bbTreeList)
        {
            block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
        }

        unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);

        if (verbose)
        {
            printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
            gtDispTree(tree);
        }
#endif

        /* Morph this statement tree */

        GenTreePtr morph = fgMorphTree(tree);

        // mark any outgoing arg temps as free so we can reuse them in the next statement.

        fgCurrentlyInUseArgTemps->ZeroAll();

        // Has fgMorphStmt been sneakily changed ?

        if (stmt->gtStmtExpr != tree)
        {
            /* This must be tailcall. Ignore 'morph' and carry on with
               the tail-call node */

            morph = stmt->gtStmtExpr;
            noway_assert(compTailCallUsed);
            noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
            noway_assert(stmt->gtNextStmt == nullptr);

            GenTreeCall* call = morph->AsCall();
            // Could either be
            //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
            //     a jmp.
            noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
                         (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
                          (compCurBB->bbFlags & BBF_HAS_JMP)));
        }
        else if (block != compCurBB)
        {
            /* This must be a tail call that caused a GCPoll to get
               injected.  We haven't actually morphed the call yet
               but the flag still got set, clear it here...  */
            CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef DEBUG
            tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
#endif

            noway_assert(compTailCallUsed);
            noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
            noway_assert(stmt->gtNextStmt == nullptr);

            GenTreeCall* call = morph->AsCall();

            // Could either be
            //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
            //     a jmp.
            noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
                         (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
                          (compCurBB->bbFlags & BBF_HAS_JMP)));
        }

#ifdef DEBUG
        if (compStressCompile(STRESS_CLONE_EXPR, 30))
        {
            // Clone all the trees to stress gtCloneExpr()

            if (verbose)
            {
                printf("\nfgMorphTree (stressClone from):\n");
                gtDispTree(morph);
            }

            morph = gtCloneExpr(morph);
            noway_assert(morph);

            if (verbose)
            {
                printf("\nfgMorphTree (stressClone to):\n");
                gtDispTree(morph);
            }
        }

        /* If the hash value changes. we modified the tree during morphing */
        if (verbose)
        {
            unsigned newHash = gtHashValue(morph);
            if (newHash != oldHash)
            {
                printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
                gtDispTree(morph);
            }
        }
#endif

        /* Check for morph as a GT_COMMA with an unconditional throw */
        if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
        {
            /* Use the call as the new stmt */
            morph = morph->gtOp.gtOp1;
            noway_assert(morph->gtOper == GT_CALL);
            noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);

            fgRemoveRestOfBlock = true;
        }

        stmt->gtStmtExpr = tree = morph;

        noway_assert(fgPtrArgCntCur == 0);

        if (fgRemoveRestOfBlock)
        {
            continue;
        }

        /* Has the statement been optimized away */

        if (fgCheckRemoveStmt(block, stmt))
        {
            continue;
        }

        /* Check if this block ends with a conditional branch that can be folded */

        if (fgFoldConditional(block))
        {
            continue;
        }

        if (ehBlockHasExnFlowDsc(block))
        {
            continue;
        }

#if OPT_MULT_ADDSUB

        /* Note whether we have two or more +=/-= operators in a row */

        if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
        {
            if (prev && prev->gtOper == tree->gtOper)
            {
                *mult = true;
            }
        }

#endif

        /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */

        if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
        {
            *loadw = true;
        }
    }

    if (fgRemoveRestOfBlock)
    {
        if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
        {
            GenTreePtr first = block->bbTreeList;
            noway_assert(first);
            GenTreePtr last = first->gtPrev;
            noway_assert(last && last->gtNext == nullptr);
            GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;

            if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
                ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
            {
                GenTreePtr op1 = lastStmt->gtOp.gtOp1;

                if (op1->OperKind() & GTK_RELOP)
                {
                    /* Unmark the comparison node with GTF_RELOP_JMP_USED */
                    op1->gtFlags &= ~GTF_RELOP_JMP_USED;
                }

                last->gtStmt.gtStmtExpr = fgMorphTree(op1);
            }
        }

        /* Mark block as a BBJ_THROW block */
        fgConvertBBToThrowBB(block);
    }

    noway_assert(fgExpandInline == false);

#if FEATURE_FASTTAILCALL
    GenTreePtr recursiveTailCall = nullptr;
    if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
    {
        fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
    }
#endif

#ifdef DEBUG
    compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
#endif

    // Reset this back so that it doesn't leak out impacting other blocks
    fgRemoveRestOfBlock = false;
}

/*****************************************************************************
 *
 *  Morph the blocks of the method.
 *  Returns true if the basic block list is modified.
 *  This function should be called just once.
 */

void Compiler::fgMorphBlocks()
{
#ifdef DEBUG
    if (verbose)
    {
        printf("\n*************** In fgMorphBlocks()\n");
    }
#endif

    /* Since fgMorphTree can be called after various optimizations to re-arrange
     * the nodes we need a global flag to signal if we are during the one-pass
     * global morphing */

    fgGlobalMorph = true;

#if LOCAL_ASSERTION_PROP
    //
    // Local assertion prop is enabled if we are optimized
    //
    optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());

    if (optLocalAssertionProp)
    {
        //
        // Initialize for local assertion prop
        //
        optAssertionInit(true);
    }
#elif ASSERTION_PROP
    //
    // If LOCAL_ASSERTION_PROP is not set
    // and we have global assertion prop
    // then local assertion prop is always off
    //
    optLocalAssertionProp = false;

#endif

    /*-------------------------------------------------------------------------
     * Process all basic blocks in the function
     */

    BasicBlock* block = fgFirstBB;
    noway_assert(block);

#ifdef DEBUG
    compCurStmtNum = 0;
#endif

    do
    {
#if OPT_MULT_ADDSUB
        bool mult = false;
#endif

#if OPT_BOOL_OPS
        bool lnot = false;
#endif

        bool loadw = false;

#ifdef DEBUG
        if (verbose)
        {
            printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
        }
#endif

#if LOCAL_ASSERTION_PROP
        if (optLocalAssertionProp)
        {
            //
            // Clear out any currently recorded assertion candidates
            // before processing each basic block,
            // also we must  handle QMARK-COLON specially
            //
            optAssertionReset(0);
        }
#endif

        /* Process all statement trees in the basic block */

        GenTreePtr tree;

        fgMorphStmts(block, &mult, &lnot, &loadw);

#if OPT_MULT_ADDSUB

        if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
        {
            for (tree = block->bbTreeList; tree; tree = tree->gtNext)
            {
                noway_assert(tree->gtOper == GT_STMT);
                GenTreePtr last = tree->gtStmt.gtStmtExpr;

                if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
                {
                    GenTreePtr temp;
                    GenTreePtr next;

                    GenTreePtr dst1 = last->gtOp.gtOp1;
                    GenTreePtr src1 = last->gtOp.gtOp2;

                    if (!last->IsCnsIntOrI())
                    {
                        goto NOT_CAFFE;
                    }

                    if (dst1->gtOper != GT_LCL_VAR)
                    {
                        goto NOT_CAFFE;
                    }
                    if (!src1->IsCnsIntOrI())
                    {
                        goto NOT_CAFFE;
                    }

                    for (;;)
                    {
                        GenTreePtr dst2;
                        GenTreePtr src2;

                        /* Look at the next statement */

                        temp = tree->gtNext;
                        if (!temp)
                        {
                            goto NOT_CAFFE;
                        }

                        noway_assert(temp->gtOper == GT_STMT);
                        next = temp->gtStmt.gtStmtExpr;

                        if (next->gtOper != last->gtOper)
                        {
                            goto NOT_CAFFE;
                        }
                        if (next->gtType != last->gtType)
                        {
                            goto NOT_CAFFE;
                        }

                        dst2 = next->gtOp.gtOp1;
                        src2 = next->gtOp.gtOp2;

                        if (dst2->gtOper != GT_LCL_VAR)
                        {
                            goto NOT_CAFFE;
                        }
                        if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
                        {
                            goto NOT_CAFFE;
                        }

                        if (!src2->IsCnsIntOrI())
                        {
                            goto NOT_CAFFE;
                        }

                        if (last->gtOverflow() != next->gtOverflow())
                        {
                            goto NOT_CAFFE;
                        }

                        const ssize_t i1    = src1->gtIntCon.gtIconVal;
                        const ssize_t i2    = src2->gtIntCon.gtIconVal;
                        const ssize_t itemp = i1 + i2;

                        /* if the operators are checking for overflow, check for overflow of the operands */

                        if (next->gtOverflow())
                        {
                            if (next->TypeGet() == TYP_LONG)
                            {
                                if (next->gtFlags & GTF_UNSIGNED)
                                {
                                    ClrSafeInt<UINT64> si1(i1);
                                    if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
                                    {
                                        goto NOT_CAFFE;
                                    }
                                }
                                else
                                {
                                    ClrSafeInt<INT64> si1(i1);
                                    if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
                                    {
                                        goto NOT_CAFFE;
                                    }
                                }
                            }
                            else if (next->gtFlags & GTF_UNSIGNED)
                            {
                                ClrSafeInt<UINT32> si1(i1);
                                if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
                                {
                                    goto NOT_CAFFE;
                                }
                            }
                            else
                            {
                                ClrSafeInt<INT32> si1(i1);
                                if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
                                {
                                    goto NOT_CAFFE;
                                }
                            }
                        }

                        /* Fold the two increments/decrements into one */

                        src1->gtIntCon.gtIconVal = itemp;
#ifdef _TARGET_64BIT_
                        if (src1->gtType == TYP_INT)
                        {
                            src1->AsIntCon()->TruncateOrSignExtend32();
                        }
#endif //_TARGET_64BIT_

                        /* Remove the second statement completely */

                        noway_assert(tree->gtNext == temp);
                        noway_assert(temp->gtPrev == tree);

                        if (temp->gtNext)
                        {
                            noway_assert(temp->gtNext->gtPrev == temp);

                            temp->gtNext->gtPrev = tree;
                            tree->gtNext         = temp->gtNext;
                        }
                        else
                        {
                            tree->gtNext = nullptr;

                            noway_assert(block->bbTreeList->gtPrev == temp);

                            block->bbTreeList->gtPrev = tree;
                        }
                    }
                }

            NOT_CAFFE:;
            }
        }

#endif

        /* Are we using a single return block? */

        if (block->bbJumpKind == BBJ_RETURN)
        {
            if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
            {
                /* We'll jump to the genReturnBB */
                CLANG_FORMAT_COMMENT_ANCHOR;

#if !defined(_TARGET_X86_)
                if (info.compFlags & CORINFO_FLG_SYNCH)
                {
                    fgConvertSyncReturnToLeave(block);
                }
                else
#endif // !_TARGET_X86_
                {
                    block->bbJumpKind = BBJ_ALWAYS;
                    block->bbJumpDest = genReturnBB;
                    fgReturnCount--;
                }

                // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
                // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
                // Such blocks do materialize as part of in-lining.
                //
                // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
                // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
                // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
                // is BAD_VAR_NUM.
                //
                // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.

                GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
                GenTreePtr ret  = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;

                // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
                if (genReturnLocal != BAD_VAR_NUM)
                {
                    // Method must be returning a value other than TYP_VOID.
                    noway_assert(compMethodHasRetVal());

                    // This block must be ending with a GT_RETURN
                    noway_assert(last != nullptr);
                    noway_assert(last->gtOper == GT_STMT);
                    noway_assert(last->gtNext == nullptr);
                    noway_assert(ret != nullptr);

                    // GT_RETURN must have non-null operand as the method is returning the value assigned to
                    // genReturnLocal
                    noway_assert(ret->OperGet() == GT_RETURN);
                    noway_assert(ret->gtGetOp1() != nullptr);

                    GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());

                    last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;

                    // make sure that copy-prop ignores this assignment.
                    last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
                }
                else if (ret != nullptr && ret->OperGet() == GT_RETURN)
                {
                    // This block ends with a GT_RETURN
                    noway_assert(last != nullptr);
                    noway_assert(last->gtOper == GT_STMT);
                    noway_assert(last->gtNext == nullptr);

                    // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
                    noway_assert(ret->TypeGet() == TYP_VOID);
                    noway_assert(ret->gtGetOp1() == nullptr);

                    fgRemoveStmt(block, last);
                }

#ifdef DEBUG
                if (verbose)
                {
                    printf("morph BB%02u to point at onereturn.  New block is\n", block->bbNum);
                    fgTableDispBasicBlock(block);
                }
#endif
            }
        }

        block = block->bbNext;
    } while (block);

    /* We are done with the global morphing phase */

    fgGlobalMorph = false;

#ifdef DEBUG
    if (verboseTrees)
    {
        fgDispBasicBlocks(true);
    }
#endif
}

//------------------------------------------------------------------------
// fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
//
// Notes:
//    fpPtrArgCntMax records the maximum number of pushed arguments.
//    Depending upon this value of the maximum number of pushed arguments
//    we may need to use an EBP frame or be partially interuptible.
//    This functionality has been factored out of fgSetOptions() because
//    the Rationalizer can create new calls.
//
// Assumptions:
//    This must be called before isFramePointerRequired() is called, because it is a
//    phased variable (can only be written before it has been read).
//
void Compiler::fgCheckArgCnt()
{
    if (!compCanEncodePtrArgCntMax())
    {
#ifdef DEBUG
        if (verbose)
        {
            printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
                   "interruptible\n");
        }
#endif
        genInterruptible = false;
    }
    if (fgPtrArgCntMax >= sizeof(unsigned))
    {
#ifdef DEBUG
        if (verbose)
        {
            printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
        }
#endif
        codeGen->setFramePointerRequired(true);
    }
}

/*****************************************************************************
 *
 *  Make some decisions about the kind of code to generate.
 */

void Compiler::fgSetOptions()
{
#ifdef DEBUG
    /* Should we force fully interruptible code ? */
    if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
    {
        noway_assert(!codeGen->isGCTypeFixed());
        genInterruptible = true;
    }
#endif

    if (opts.compDbgCode)
    {
        assert(!codeGen->isGCTypeFixed());
        genInterruptible = true; // debugging is easier this way ...
    }

    /* Assume we won't need an explicit stack frame if this is allowed */

    // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
    // the callee-saved registers.
    noway_assert(!compTailCallUsed || !compLocallocUsed);

    if (compLocallocUsed)
    {
        codeGen->setFramePointerRequired(true);
    }

#ifdef _TARGET_X86_

    if (compTailCallUsed)
        codeGen->setFramePointerRequired(true);

#endif // _TARGET_X86_

    if (!opts.genFPopt)
    {
        codeGen->setFramePointerRequired(true);
    }

    // Assert that the EH table has been initialized by now. Note that
    // compHndBBtabAllocCount never decreases; it is a high-water mark
    // of table allocation. In contrast, compHndBBtabCount does shrink
    // if we delete a dead EH region, and if it shrinks to zero, the
    // table pointer compHndBBtab is unreliable.
    assert(compHndBBtabAllocCount >= info.compXcptnsCount);

#ifdef _TARGET_X86_

    // Note: this case, and the !X86 case below, should both use the
    // !X86 path. This would require a few more changes for X86 to use
    // compHndBBtabCount (the current number of EH clauses) instead of
    // info.compXcptnsCount (the number of EH clauses in IL), such as
    // in ehNeedsShadowSPslots(). This is because sometimes the IL has
    // an EH clause that we delete as statically dead code before we
    // get here, leaving no EH clauses left, and thus no requirement
    // to use a frame pointer because of EH. But until all the code uses
    // the same test, leave info.compXcptnsCount here.
    if (info.compXcptnsCount > 0)
    {
        codeGen->setFramePointerRequiredEH(true);
    }

#else // !_TARGET_X86_

    if (compHndBBtabCount > 0)
    {
        codeGen->setFramePointerRequiredEH(true);
    }

#endif // _TARGET_X86_

#ifdef UNIX_X86_ABI
    if (info.compXcptnsCount > 0)
    {
        assert(!codeGen->isGCTypeFixed());
        // Enforce fully interruptible codegen for funclet unwinding
        genInterruptible = true;
    }
#endif // UNIX_X86_ABI

    fgCheckArgCnt();

    if (info.compCallUnmanaged)
    {
        codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
    }

    if (info.compPublishStubParam)
    {
        codeGen->setFramePointerRequiredGCInfo(true);
    }

    if (opts.compNeedSecurityCheck)
    {
        codeGen->setFramePointerRequiredGCInfo(true);

#ifndef JIT32_GCENCODER

        // The decoder only reports objects in frames with exceptions if the frame
        // is fully interruptible.
        // Even if there is no catch or other way to resume execution in this frame
        // the VM requires the security object to remain alive until later, so
        // Frames with security objects must be fully interruptible.
        genInterruptible = true;

#endif // JIT32_GCENCODER
    }

    if (compIsProfilerHookNeeded())
    {
        codeGen->setFramePointerRequired(true);
    }

    if (info.compIsVarArgs)
    {
        // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
        codeGen->setFramePointerRequiredGCInfo(true);
    }

    if (lvaReportParamTypeArg())
    {
        codeGen->setFramePointerRequiredGCInfo(true);
    }

    // printf("method will %s be fully interruptible\n", genInterruptible ? "   " : "not");
}

/*****************************************************************************/

GenTreePtr Compiler::fgInitThisClass()
{
    noway_assert(!compIsForInlining());

    CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);

    if (!kind.needsRuntimeLookup)
    {
        return fgGetSharedCCtor(info.compClassHnd);
    }
    else
    {
#ifdef FEATURE_READYTORUN_COMPILER
        // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
        if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
        {
            CORINFO_RESOLVED_TOKEN resolvedToken;
            memset(&resolvedToken, 0, sizeof(resolvedToken));

            // We are in a shared method body, but maybe we don't need a runtime lookup after all.
            // This covers the case of a generic method on a non-generic type.
            if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
            {
                resolvedToken.hClass = info.compClassHnd;
                return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
            }

            // We need a runtime lookup.
            GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);

            // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
            // base of the class that owns the method being compiled". If we're in this method, it means we're not
            // inlining and there's no ambiguity.
            return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
                                             gtNewArgList(ctxTree), &kind);
        }
#endif

        // Collectible types requires that for shared generic code, if we use the generic context paramter
        // that we report it. (This is a conservative approach, we could detect some cases particularly when the
        // context parameter is this that we don't need the eager reporting logic.)
        lvaGenericsContextUseCount++;

        switch (kind.runtimeLookupKind)
        {
            case CORINFO_LOOKUP_THISOBJ:
                // This code takes a this pointer; but we need to pass the static method desc to get the right point in
                // the hierarchy
                {
                    GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
                    // Vtable pointer of this object
                    vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
                    vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
                    GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);

                    return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
                                               gtNewArgList(vtTree, methodHnd));
                }

            case CORINFO_LOOKUP_CLASSPARAM:
            {
                GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
                return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
            }

            case CORINFO_LOOKUP_METHODPARAM:
            {
                GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
                return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
                                           gtNewArgList(gtNewIconNode(0), methHndTree));
            }
        }
    }

    noway_assert(!"Unknown LOOKUP_KIND");
    UNREACHABLE();
}

#ifdef DEBUG
/*****************************************************************************
 *
 *  Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
 *  except for the allowed ? 1 : 0; pattern.
 */
Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
{
    if ((*tree)->OperGet() == GT_QMARK)
    {
        fgCheckQmarkAllowedForm(*tree);
    }
    return WALK_CONTINUE;
}

void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
{
    assert(tree->OperGet() == GT_QMARK);
#ifndef LEGACY_BACKEND
    assert(!"Qmarks beyond morph disallowed.");
#else  // LEGACY_BACKEND
    GenTreePtr colon = tree->gtOp.gtOp2;

    assert(colon->gtOp.gtOp1->IsIntegralConst(0));
    assert(colon->gtOp.gtOp2->IsIntegralConst(1));
#endif // LEGACY_BACKEND
}

/*****************************************************************************
 *
 *  Verify that the importer has created GT_QMARK nodes in a way we can
 *  process them. The following is allowed:
 *
 *  1. A top level qmark. Top level qmark is of the form:
 *      a) (bool) ? (void) : (void) OR
 *      b) V0N = (bool) ? (type) : (type)
 *
 *  2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
 *     of either op1 of colon or op2 of colon but not a child of any other
 *     operator.
 */
void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
{
    GenTreePtr topQmark = fgGetTopLevelQmark(expr);

    // If the top level Qmark is null, then scan the tree to make sure
    // there are no qmarks within it.
    if (topQmark == nullptr)
    {
        fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
    }
    else
    {
        // We could probably expand the cond node also, but don't think the extra effort is necessary,
        // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
        fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);

        fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
        fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
    }
}
#endif // DEBUG

/*****************************************************************************
 *
 *  Get the top level GT_QMARK node in a given "expr", return NULL if such a
 *  node is not present. If the top level GT_QMARK node is assigned to a
 *  GT_LCL_VAR, then return the lcl node in ppDst.
 *
 */
GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
{
    if (ppDst != nullptr)
    {
        *ppDst = nullptr;
    }

    GenTreePtr topQmark = nullptr;
    if (expr->gtOper == GT_QMARK)
    {
        topQmark = expr;
    }
    else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
    {
        topQmark = expr->gtOp.gtOp2;
        if (ppDst != nullptr)
        {
            *ppDst = expr->gtOp.gtOp1;
        }
    }
    return topQmark;
}

/*********************************************************************************
 *
 *  For a castclass helper call,
 *  Importer creates the following tree:
 *      tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
 *
 *  This method splits the qmark expression created by the importer into the
 *  following blocks: (block, asg, cond1, cond2, helper, remainder)
 *  Notice that op1 is the result for both the conditions. So we coalesce these
 *  assignments into a single block instead of two blocks resulting a nested diamond.
 *
 *                       +---------->-----------+
 *                       |          |           |
 *                       ^          ^           v
 *                       |          |           |
 *  block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
 *
 *  We expect to achieve the following codegen:
 *     mov      rsi, rdx                           tmp = op1                  // asgBlock
 *     test     rsi, rsi                           goto skip if tmp == null ? // cond1Block
 *     je       SKIP
 *     mov      rcx, 0x76543210                    cns = op2                  // cond2Block
 *     cmp      qword ptr [rsi], rcx               goto skip if *tmp == op2
 *     je       SKIP
 *     call     CORINFO_HELP_CHKCASTCLASS_SPECIAL  tmp = helper(cns, tmp)     // helperBlock
 *     mov      rsi, rax
 *  SKIP:                                                                     // remainderBlock
 *     tmp has the result.
 *
 */
void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
{
#ifdef DEBUG
    if (verbose)
    {
        printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
        fgDispBasicBlocks(block, block, true);
    }
#endif // DEBUG

    GenTreePtr expr = stmt->gtStmt.gtStmtExpr;

    GenTreePtr dst   = nullptr;
    GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
    noway_assert(dst != nullptr);

    assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);

    // Get cond, true, false exprs for the qmark.
    GenTreePtr condExpr  = qmark->gtGetOp1();
    GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
    GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();

    // Get cond, true, false exprs for the nested qmark.
    GenTreePtr nestedQmark = falseExpr;
    GenTreePtr cond2Expr;
    GenTreePtr true2Expr;
    GenTreePtr false2Expr;

    if (nestedQmark->gtOper == GT_QMARK)
    {
        cond2Expr  = nestedQmark->gtGetOp1();
        true2Expr  = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
        false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();

        assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
        cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
    }
    else
    {
        // This is a rare case that arises when we are doing minopts and encounter isinst of null
        // gtFoldExpr was still is able to optimize away part of the tree (but not all).
        // That means it does not match our pattern.

        // Rather than write code to handle this case, just fake up some nodes to make it match the common
        // case.  Synthesize a comparison that is always true, and for the result-on-true, use the
        // entire subtree we expected to be the nested question op.

        cond2Expr  = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
        true2Expr  = nestedQmark;
        false2Expr = gtNewIconNode(0, TYP_I_IMPL);
    }
    assert(false2Expr->OperGet() == trueExpr->OperGet());

    // Clear flags as they are now going to be part of JTRUE.
    assert(condExpr->gtFlags & GTF_RELOP_QMARK);
    condExpr->gtFlags &= ~GTF_RELOP_QMARK;

    // Create the chain of blocks. See method header comment.
    // The order of blocks after this is the following:
    //     block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
    //
    // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
    // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
    // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
    // remainderBlock will still be GC safe.
    unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
    BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
    fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.

    BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
    BasicBlock* cond2Block  = fgNewBBafter(BBJ_COND, block, true);
    BasicBlock* cond1Block  = fgNewBBafter(BBJ_COND, block, true);
    BasicBlock* asgBlock    = fgNewBBafter(BBJ_NONE, block, true);

    remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;

    // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
    // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
    if ((block->bbFlags & BBF_INTERNAL) == 0)
    {
        helperBlock->bbFlags &= ~BBF_INTERNAL;
        cond2Block->bbFlags &= ~BBF_INTERNAL;
        cond1Block->bbFlags &= ~BBF_INTERNAL;
        asgBlock->bbFlags &= ~BBF_INTERNAL;
        helperBlock->bbFlags |= BBF_IMPORTED;
        cond2Block->bbFlags |= BBF_IMPORTED;
        cond1Block->bbFlags |= BBF_IMPORTED;
        asgBlock->bbFlags |= BBF_IMPORTED;
    }

    // Chain the flow correctly.
    fgAddRefPred(asgBlock, block);
    fgAddRefPred(cond1Block, asgBlock);
    fgAddRefPred(cond2Block, cond1Block);
    fgAddRefPred(helperBlock, cond2Block);
    fgAddRefPred(remainderBlock, helperBlock);
    fgAddRefPred(remainderBlock, cond1Block);
    fgAddRefPred(remainderBlock, cond2Block);

    cond1Block->bbJumpDest = remainderBlock;
    cond2Block->bbJumpDest = remainderBlock;

    // Set the weights; some are guesses.
    asgBlock->inheritWeight(block);
    cond1Block->inheritWeight(block);
    cond2Block->inheritWeightPercentage(cond1Block, 50);
    helperBlock->inheritWeightPercentage(cond2Block, 50);

    // Append cond1 as JTRUE to cond1Block
    GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
    GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
    fgInsertStmtAtEnd(cond1Block, jmpStmt);

    // Append cond2 as JTRUE to cond2Block
    jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
    jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
    fgInsertStmtAtEnd(cond2Block, jmpStmt);

    // AsgBlock should get tmp = op1 assignment.
    trueExpr            = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
    GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
    fgInsertStmtAtEnd(asgBlock, trueStmt);

    // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
    gtReverseCond(cond2Expr);
    GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
    GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
    fgInsertStmtAtEnd(helperBlock, helperStmt);

    // Finally remove the nested qmark stmt.
    fgRemoveStmt(block, stmt);

#ifdef DEBUG
    if (verbose)
    {
        printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
        fgDispBasicBlocks(block, remainderBlock, true);
    }
#endif // DEBUG
}

/*****************************************************************************
 *
 *  Expand a statement with a top level qmark node. There are three cases, based
 *  on whether the qmark has both "true" and "false" arms, or just one of them.
 *
 *     S0;
 *     C ? T : F;
 *     S1;
 *
 *     Generates ===>
 *
 *                       bbj_always
 *                       +---->------+
 *                 false |           |
 *     S0 -->-- ~C -->-- T   F -->-- S1
 *              |            |
 *              +--->--------+
 *              bbj_cond(true)
 *
 *     -----------------------------------------
 *
 *     S0;
 *     C ? T : NOP;
 *     S1;
 *
 *     Generates ===>
 *
 *                 false
 *     S0 -->-- ~C -->-- T -->-- S1
 *              |                |
 *              +-->-------------+
 *              bbj_cond(true)
 *
 *     -----------------------------------------
 *
 *     S0;
 *     C ? NOP : F;
 *     S1;
 *
 *     Generates ===>
 *
 *                false
 *     S0 -->-- C -->-- F -->-- S1
 *              |               |
 *              +-->------------+
 *              bbj_cond(true)
 *
 *  If the qmark assigns to a variable, then create tmps for "then"
 *  and "else" results and assign the temp to the variable as a writeback step.
 */
void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
{
    GenTreePtr expr = stmt->gtStmt.gtStmtExpr;

    // Retrieve the Qmark node to be expanded.
    GenTreePtr dst   = nullptr;
    GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
    if (qmark == nullptr)
    {
        return;
    }

    if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
    {
        fgExpandQmarkForCastInstOf(block, stmt);
        return;
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
        fgDispBasicBlocks(block, block, true);
    }
#endif // DEBUG

    // Retrieve the operands.
    GenTreePtr condExpr  = qmark->gtGetOp1();
    GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
    GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();

    assert(condExpr->gtFlags & GTF_RELOP_QMARK);
    condExpr->gtFlags &= ~GTF_RELOP_QMARK;

    assert(!varTypeIsFloating(condExpr->TypeGet()));

    bool hasTrueExpr  = (trueExpr->OperGet() != GT_NOP);
    bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
    assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!

    // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
    //     block ... condBlock ... elseBlock ... remainderBlock
    //
    // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
    // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
    // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
    // remainderBlock will still be GC safe.
    unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
    BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
    fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.

    BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
    BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);

    // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
    // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
    if ((block->bbFlags & BBF_INTERNAL) == 0)
    {
        condBlock->bbFlags &= ~BBF_INTERNAL;
        elseBlock->bbFlags &= ~BBF_INTERNAL;
        condBlock->bbFlags |= BBF_IMPORTED;
        elseBlock->bbFlags |= BBF_IMPORTED;
    }

    remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;

    condBlock->inheritWeight(block);

    fgAddRefPred(condBlock, block);
    fgAddRefPred(elseBlock, condBlock);
    fgAddRefPred(remainderBlock, elseBlock);

    BasicBlock* thenBlock = nullptr;
    if (hasTrueExpr && hasFalseExpr)
    {
        //                       bbj_always
        //                       +---->------+
        //                 false |           |
        //     S0 -->-- ~C -->-- T   F -->-- S1
        //              |            |
        //              +--->--------+
        //              bbj_cond(true)
        //
        gtReverseCond(condExpr);
        condBlock->bbJumpDest = elseBlock;

        thenBlock             = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
        thenBlock->bbJumpDest = remainderBlock;
        if ((block->bbFlags & BBF_INTERNAL) == 0)
        {
            thenBlock->bbFlags &= ~BBF_INTERNAL;
            thenBlock->bbFlags |= BBF_IMPORTED;
        }

        elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);

        fgAddRefPred(thenBlock, condBlock);
        fgAddRefPred(remainderBlock, thenBlock);

        thenBlock->inheritWeightPercentage(condBlock, 50);
        elseBlock->inheritWeightPercentage(condBlock, 50);
    }
    else if (hasTrueExpr)
    {
        //                 false
        //     S0 -->-- ~C -->-- T -->-- S1
        //              |                |
        //              +-->-------------+
        //              bbj_cond(true)
        //
        gtReverseCond(condExpr);
        condBlock->bbJumpDest = remainderBlock;
        fgAddRefPred(remainderBlock, condBlock);
        // Since we have no false expr, use the one we'd already created.
        thenBlock = elseBlock;
        elseBlock = nullptr;

        thenBlock->inheritWeightPercentage(condBlock, 50);
    }
    else if (hasFalseExpr)
    {
        //                false
        //     S0 -->-- C -->-- F -->-- S1
        //              |               |
        //              +-->------------+
        //              bbj_cond(true)
        //
        condBlock->bbJumpDest = remainderBlock;
        fgAddRefPred(remainderBlock, condBlock);

        elseBlock->inheritWeightPercentage(condBlock, 50);
    }

    GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
    GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
    fgInsertStmtAtEnd(condBlock, jmpStmt);

    // Remove the original qmark statement.
    fgRemoveStmt(block, stmt);

    // Since we have top level qmarks, we either have a dst for it in which case
    // we need to create tmps for true and falseExprs, else just don't bother
    // assigning.
    unsigned lclNum = BAD_VAR_NUM;
    if (dst != nullptr)
    {
        assert(dst->gtOper == GT_LCL_VAR);
        lclNum = dst->gtLclVar.gtLclNum;
    }
    else
    {
        assert(qmark->TypeGet() == TYP_VOID);
    }

    if (hasTrueExpr)
    {
        if (dst != nullptr)
        {
            trueExpr = gtNewTempAssign(lclNum, trueExpr);
        }
        GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
        fgInsertStmtAtEnd(thenBlock, trueStmt);
    }

    // Assign the falseExpr into the dst or tmp, insert in elseBlock
    if (hasFalseExpr)
    {
        if (dst != nullptr)
        {
            falseExpr = gtNewTempAssign(lclNum, falseExpr);
        }
        GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
        fgInsertStmtAtEnd(elseBlock, falseStmt);
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
        fgDispBasicBlocks(block, remainderBlock, true);
    }
#endif // DEBUG
}

/*****************************************************************************
 *
 *  Expand GT_QMARK nodes from the flow graph into basic blocks.
 *
 */

void Compiler::fgExpandQmarkNodes()
{
    if (compQmarkUsed)
    {
        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
        {
            for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
            {
                GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
#ifdef DEBUG
                fgPreExpandQmarkChecks(expr);
#endif
                fgExpandQmarkStmt(block, stmt);
            }
        }
#ifdef DEBUG
        fgPostExpandQmarkChecks();
#endif
    }
    compQmarkRationalized = true;
}

#ifdef DEBUG
/*****************************************************************************
 *
 *  Make sure we don't have any more GT_QMARK nodes.
 *
 */
void Compiler::fgPostExpandQmarkChecks()
{
    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
    {
        for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
        {
            GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
            fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
        }
    }
}
#endif

/*****************************************************************************
 *
 *  Transform all basic blocks for codegen.
 */

void Compiler::fgMorph()
{
    noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.

    fgOutgoingArgTemps = nullptr;

#ifdef DEBUG
    if (verbose)
    {
        printf("*************** In fgMorph()\n");
    }
    if (verboseTrees)
    {
        fgDispBasicBlocks(true);
    }
#endif // DEBUG

    // Insert call to class constructor as the first basic block if
    // we were asked to do so.
    if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
                                    impTokenLookupContextHandle /* context */) &
        CORINFO_INITCLASS_USE_HELPER)
    {
        fgEnsureFirstBBisScratch();
        fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
    }

#ifdef DEBUG
    if (opts.compGcChecks)
    {
        for (unsigned i = 0; i < info.compArgsCount; i++)
        {
            if (lvaTable[i].TypeGet() == TYP_REF)
            {
                // confirm that the argument is a GC pointer (for debugging (GC stress))
                GenTreePtr      op   = gtNewLclvNode(i, TYP_REF);
                GenTreeArgList* args = gtNewArgList(op);
                op                   = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);

                fgEnsureFirstBBisScratch();
                fgInsertStmtAtEnd(fgFirstBB, op);
            }
        }
    }

    if (opts.compStackCheckOnRet)
    {
        lvaReturnEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
        lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
    }

    if (opts.compStackCheckOnCall)
    {
        lvaCallEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
        lvaTable[lvaCallEspCheck].lvType = TYP_INT;
    }
#endif // DEBUG

    /* Filter out unimported BBs */

    fgRemoveEmptyBlocks();

#ifdef DEBUG
    /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
    fgDebugCheckBBlist(false, false);
#endif // DEBUG

    EndPhase(PHASE_MORPH_INIT);

    /* Inline */
    fgInline();
#if 0
    JITDUMP("trees after inlining\n");
    DBEXEC(VERBOSE, fgDispBasicBlocks(true));
#endif

    RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.

    EndPhase(PHASE_MORPH_INLINE);

    /* Add any internal blocks/trees we may need */

    fgAddInternal();

#if OPT_BOOL_OPS
    fgMultipleNots = false;
#endif

#ifdef DEBUG
    /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
    fgDebugCheckBBlist(false, false);
#endif // DEBUG

    fgRemoveEmptyTry();

    EndPhase(PHASE_EMPTY_TRY);

    fgRemoveEmptyFinally();

    EndPhase(PHASE_EMPTY_FINALLY);

    fgMergeFinallyChains();

    EndPhase(PHASE_MERGE_FINALLY_CHAINS);

    fgCloneFinally();

    EndPhase(PHASE_CLONE_FINALLY);

    fgUpdateFinallyTargetFlags();

    /* For x64 and ARM64 we need to mark irregular parameters */
    fgMarkImplicitByRefArgs();

    /* Promote struct locals if necessary */
    fgPromoteStructs();

    /* Now it is the time to figure out what locals have address-taken. */
    fgMarkAddressExposedLocals();

    EndPhase(PHASE_STR_ADRLCL);

    /* Apply the type update to implicit byref parameters; also choose (based on address-exposed
       analysis) which implicit byref promotions to keep (requires copy to initialize) or discard. */
    fgRetypeImplicitByRefArgs();

#ifdef DEBUG
    /* Now that locals have address-taken and implicit byref marked, we can safely apply stress. */
    lvaStressLclFld();
    fgStress64RsltMul();
#endif // DEBUG

    EndPhase(PHASE_MORPH_IMPBYREF);

    /* Morph the trees in all the blocks of the method */

    fgMorphBlocks();

    /* Fix any LclVar annotations on discarded struct promotion temps for implicit by-ref args */
    fgMarkDemotedImplicitByRefArgs();

    EndPhase(PHASE_MORPH_GLOBAL);

#if 0
    JITDUMP("trees after fgMorphBlocks\n");
    DBEXEC(VERBOSE, fgDispBasicBlocks(true));
#endif

    /* Decide the kind of code we want to generate */

    fgSetOptions();

    fgExpandQmarkNodes();

#ifdef DEBUG
    compCurBB = nullptr;
#endif // DEBUG
}

/*****************************************************************************
 *
 *  Promoting struct locals
 */
void Compiler::fgPromoteStructs()
{
#ifdef DEBUG
    if (verbose)
    {
        printf("*************** In fgPromoteStructs()\n");
    }
#endif // DEBUG

    if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
    {
        return;
    }

    if (fgNoStructPromotion)
    {
        return;
    }

#if 0
    // The code in this #if has been useful in debugging struct promotion issues, by
    // enabling selective enablement of the struct promotion optimization according to
    // method hash.
#ifdef DEBUG
    unsigned methHash = info.compMethodHash();
    char* lostr = getenv("structpromohashlo");
    unsigned methHashLo = 0;
    if (lostr != NULL)
    {
        sscanf_s(lostr, "%x", &methHashLo);
    }
    char* histr = getenv("structpromohashhi");
    unsigned methHashHi = UINT32_MAX;
    if (histr != NULL)
    {
        sscanf_s(histr, "%x", &methHashHi);
    }
    if (methHash < methHashLo || methHash > methHashHi)
    {
        return;
    }
    else
    {
        printf("Promoting structs for method %s, hash = 0x%x.\n",
               info.compFullName, info.compMethodHash());
        printf("");         // in our logic this causes a flush
    }
#endif // DEBUG
#endif // 0

    if (info.compIsVarArgs)
    {
        return;
    }

    if (getNeedsGSSecurityCookie())
    {
        return;
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("\nlvaTable before fgPromoteStructs\n");
        lvaTableDump();
    }
#endif // DEBUG

    // The lvaTable might grow as we grab temps. Make a local copy here.
    unsigned startLvaCount = lvaCount;

    //
    // Loop through the original lvaTable. Looking for struct locals to be promoted.
    //
    lvaStructPromotionInfo structPromotionInfo;
    bool                   tooManyLocals = false;

    for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
    {
        // Whether this var got promoted
        bool       promotedVar = false;
        LclVarDsc* varDsc      = &lvaTable[lclNum];

        // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
        // its fields.  Instead, we will attempt to enregister the entire struct.
        if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
        {
            varDsc->lvRegStruct = true;
        }
        // Don't promote if we have reached the tracking limit.
        else if (lvaHaveManyLocals())
        {
            // Print the message first time when we detected this condition
            if (!tooManyLocals)
            {
                JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
            }
            tooManyLocals = true;
        }
        else if (varTypeIsStruct(varDsc))
        {
            bool shouldPromote;

            lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
            if (structPromotionInfo.canPromote)
            {
                shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
            }
            else
            {
                shouldPromote = false;
            }

#if 0
            // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
            // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
            static int structPromoVarNum = 0;
            structPromoVarNum++;
            if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
#endif // 0

            if (shouldPromote)
            {
                // Promote the this struct local var.
                lvaPromoteStructVar(lclNum, &structPromotionInfo);
                promotedVar = true;

#ifdef _TARGET_ARM_
                if (structPromotionInfo.requiresScratchVar)
                {
                    // Ensure that the scratch variable is allocated, in case we
                    // pass a promoted struct as an argument.
                    if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
                    {
                        lvaPromotedStructAssemblyScratchVar =
                            lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
                        lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
                    }
                }
#endif // _TARGET_ARM_
            }
        }

        if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
        {
            // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
            // we will treat it as a reg struct.
            varDsc->lvRegStruct = true;
        }
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("\nlvaTable after fgPromoteStructs\n");
        lvaTableDump();
    }
#endif // DEBUG
}

Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
{
    noway_assert(tree->OperGet() == GT_FIELD);

    GenTreePtr objRef = tree->gtField.gtFldObj;
    GenTreePtr obj    = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
    noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));

    /* Is this an instance data member? */

    if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
    {
        unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
        LclVarDsc* varDsc = &lvaTable[lclNum];

        if (varTypeIsStruct(obj))
        {
            if (varDsc->lvPromoted)
            {
                // Promoted struct
                unsigned fldOffset     = tree->gtField.gtFldOffset;
                unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
                noway_assert(fieldLclIndex != BAD_VAR_NUM);

                if (lvaIsImplicitByRefLocal(lclNum))
                {
                    // Keep track of the number of appearances of each promoted implicit
                    // byref (here during struct promotion, which happens during address-exposed
                    // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
                    // byref params when deciding if it's legal to elide certain copies of them.
                    // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
                    // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
                    // chance, so have to check now.
                    JITDUMP(
                        "Incrementing ref count from %d to %d for V%02d in fgMorphStructField for promoted struct\n",
                        varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
                    varDsc->lvRefCnt++;
                }

                tree->SetOper(GT_LCL_VAR);
                tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
                tree->gtType = lvaTable[fieldLclIndex].TypeGet();
                tree->gtFlags &= GTF_NODE_MASK;
                tree->gtFlags &= ~GTF_GLOB_REF;

                GenTreePtr parent = fgWalkPre->parentStack->Index(1);
                if (parent->gtOper == GT_ASG)
                {
                    if (parent->gtOp.gtOp1 == tree)
                    {
                        tree->gtFlags |= GTF_VAR_DEF;
                        tree->gtFlags |= GTF_DONT_CSE;
                    }

                    // Promotion of struct containing struct fields where the field
                    // is a struct with a single pointer sized scalar type field: in
                    // this case struct promotion uses the type  of the underlying
                    // scalar field as the type of struct field instead of recursively
                    // promoting. This can lead to a case where we have a block-asgn
                    // with its RHS replaced with a scalar type.  Mark RHS value as
                    // DONT_CSE so that assertion prop will not do const propagation.
                    // The reason this is required is that if RHS of a block-asg is a
                    // constant, then it is interpreted as init-block incorrectly.
                    //
                    // TODO - This can also be avoided if we implement recursive struct
                    // promotion.
                    if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
                    {
                        tree->gtFlags |= GTF_DONT_CSE;
                    }
                }
#ifdef DEBUG
                if (verbose)
                {
                    printf("Replacing the field in promoted struct with a local var:\n");
                    fgWalkPre->printModified = true;
                }
#endif // DEBUG
                return WALK_SKIP_SUBTREES;
            }
        }
        else
        {
            // Normed struct
            // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
            // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
            // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
            // there is one extremely rare case where that won't be true. An enum type is a special value type
            // that contains exactly one element of a primitive integer type (that, for CLS programs is named
            // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
            // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
            // ldfld. For example:
            //
            //  .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
            //  {
            //    .field public specialname rtspecialname int16 value__
            //    .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
            //  }
            //  .method public hidebysig static void  Main() cil managed
            //  {
            //     .locals init (valuetype mynamespace.e_t V_0)
            //     ...
            //     ldloca.s   V_0
            //     ldflda     int16 mynamespace.e_t::value__
            //     ...
            //  }
            //
            // Normally, compilers will not generate the ldflda, since it is superfluous.
            //
            // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
            // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
            // mismatch like this, don't do this morphing. The local var may end up getting marked as
            // address taken, and the appropriate SHORT load will be done from memory in that case.

            if (tree->TypeGet() == obj->TypeGet())
            {
                if (lvaIsImplicitByRefLocal(lclNum))
                {
                    // Keep track of the number of appearances of each promoted implicit
                    // byref (here during struct promotion, which happens during address-exposed
                    // analysis); fgMakeOutgoingStructArgCopy checks the ref counts for implicit
                    // byref params when deciding if it's legal to elide certain copies of them.
                    // Normally fgMarkAddrTakenLocalsPreCB (which calls this method) flags the
                    // lclVars, but here we're about to return SKIP_SUBTREES and rob it of the
                    // chance, so have to check now.
                    JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField for normed struct\n",
                            varDsc->lvRefCnt, varDsc->lvRefCnt + 1, lclNum);
                    varDsc->lvRefCnt++;
                }

                tree->ChangeOper(GT_LCL_VAR);
                tree->gtLclVarCommon.SetLclNum(lclNum);
                tree->gtFlags &= GTF_NODE_MASK;

                GenTreePtr parent = fgWalkPre->parentStack->Index(1);
                if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
                {
                    tree->gtFlags |= GTF_VAR_DEF;
                    tree->gtFlags |= GTF_DONT_CSE;
                }
#ifdef DEBUG
                if (verbose)
                {
                    printf("Replacing the field in normed struct with the local var:\n");
                    fgWalkPre->printModified = true;
                }
#endif // DEBUG
                return WALK_SKIP_SUBTREES;
            }
        }
    }

    return WALK_CONTINUE;
}

Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
{
    noway_assert(tree->OperGet() == GT_LCL_FLD);

    unsigned   lclNum = tree->gtLclFld.gtLclNum;
    LclVarDsc* varDsc = &lvaTable[lclNum];

    if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
    {
        // Promoted struct
        unsigned   fldOffset     = tree->gtLclFld.gtLclOffs;
        unsigned   fieldLclIndex = 0;
        LclVarDsc* fldVarDsc     = nullptr;

        if (fldOffset != BAD_VAR_NUM)
        {
            fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
            noway_assert(fieldLclIndex != BAD_VAR_NUM);
            fldVarDsc = &lvaTable[fieldLclIndex];
        }

        if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
#ifdef _TARGET_X86_
            && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
#endif
                )
        {
            // There is an existing sub-field we can use
            tree->gtLclFld.SetLclNum(fieldLclIndex);

            // We need to keep the types 'compatible'.  If we can switch back to a GT_LCL_VAR
            CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef _TARGET_ARM_
            assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
#else
            assert(varTypeIsIntegralOrI(tree->TypeGet()));
#endif
            if (varTypeCanReg(fldVarDsc->TypeGet()))
            {
                // If the type is integer-ish, then we can use it as-is
                tree->ChangeOper(GT_LCL_VAR);
                assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
                tree->gtType = fldVarDsc->TypeGet();
#ifdef DEBUG
                if (verbose)
                {
                    printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
                    fgWalkPre->printModified = true;
                }
#endif // DEBUG
            }

            GenTreePtr parent = fgWalkPre->parentStack->Index(1);
            if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
            {
                tree->gtFlags |= GTF_VAR_DEF;
                tree->gtFlags |= GTF_DONT_CSE;
            }
        }
        else
        {
            // There is no existing field that has all the parts that we need
            // So we must ensure that the struct lives in memory.
            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));

#ifdef DEBUG
            // We can't convert this guy to a float because he really does have his
            // address taken..
            varDsc->lvKeepType = 1;
#endif // DEBUG
        }

        return WALK_SKIP_SUBTREES;
    }

    return WALK_CONTINUE;
}

//------------------------------------------------------------------------
// fgMarkImplicitByRefArgs: Identify any by-value struct parameters which are "implicit by-reference";
//                          i.e. which the ABI requires to be passed by making a copy in the caller and
//                          passing its address to the callee.  Mark their `LclVarDsc`s such that
//                          `lvaIsImplicitByRefLocal` will return true for them.

void Compiler::fgMarkImplicitByRefArgs()
{
#if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
#ifdef DEBUG
    if (verbose)
    {
        printf("\n*************** In fgMarkImplicitByRefs()\n");
    }
#endif // DEBUG

    for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
    {
        LclVarDsc* varDsc = &lvaTable[lclNum];

        if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
        {
            size_t size;

            if (varDsc->lvSize() > REGSIZE_BYTES)
            {
                size = varDsc->lvSize();
            }
            else
            {
                CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
                size                         = info.compCompHnd->getClassSize(typeHnd);
            }

#if defined(_TARGET_AMD64_)
            if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
#elif defined(_TARGET_ARM64_)
            if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
#endif
            {
                // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
                // So I am now using it to indicate that this is one of the weird implicit
                // by ref locals.
                // The address taken cleanup will look for references to locals marked like
                // this, and transform them appropriately.
                varDsc->lvIsTemp = 1;

                // Clear the ref count field; fgMarkAddressTakenLocals will increment it per
                // appearance of implicit-by-ref param so that call arg morphing can do an
                // optimization for single-use implicit-by-ref params whose single use is as
                // an outgoing call argument.
                varDsc->lvRefCnt = 0;
            }
        }
    }

#endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
}

//------------------------------------------------------------------------
// fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
//                            struct to pointer).  Also choose (based on address-exposed analysis)
//                            which struct promotions of implicit byrefs to keep or discard.
//                            For those which are kept, insert the appropriate initialization code.
//                            For those which are to be discarded, annotate the promoted field locals
//                            so that fgMorphImplicitByRefArgs will know to rewrite their appearances
//                            using indirections off the pointer parameters.

void Compiler::fgRetypeImplicitByRefArgs()
{
#if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)
#ifdef DEBUG
    if (verbose)
    {
        printf("\n*************** In fgRetypeImplicitByRefArgs()\n");
    }
#endif // DEBUG

    for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
    {
        LclVarDsc* varDsc = &lvaTable[lclNum];

        if (lvaIsImplicitByRefLocal(lclNum))
        {
            size_t size;

            if (varDsc->lvSize() > REGSIZE_BYTES)
            {
                size = varDsc->lvSize();
            }
            else
            {
                CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
                size                         = info.compCompHnd->getClassSize(typeHnd);
            }

            if (varDsc->lvPromoted)
            {
                // This implicit-by-ref was promoted; create a new temp to represent the
                // promoted struct before rewriting this parameter as a pointer.
                unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
                lvaSetStruct(newLclNum, lvaGetStruct(lclNum), true);
                // Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
                varDsc = &lvaTable[lclNum];

                // Copy the struct promotion annotations to the new temp.
                LclVarDsc* newVarDsc       = &lvaTable[newLclNum];
                newVarDsc->lvPromoted      = true;
                newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
                newVarDsc->lvFieldCnt      = varDsc->lvFieldCnt;
                newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
                newVarDsc->lvCustomLayout  = varDsc->lvCustomLayout;
#ifdef DEBUG
                newVarDsc->lvKeepType = true;
#endif // DEBUG

                // Propagate address-taken-ness and do-not-enregister-ness.
                newVarDsc->lvAddrExposed     = varDsc->lvAddrExposed;
                newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
#ifdef DEBUG
                newVarDsc->lvLclBlockOpAddr   = varDsc->lvLclBlockOpAddr;
                newVarDsc->lvLclFieldExpr     = varDsc->lvLclFieldExpr;
                newVarDsc->lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
                newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
                newVarDsc->lvLiveAcrossUCall  = varDsc->lvLiveAcrossUCall;
#endif // DEBUG

                // If the promotion is dependent, the promoted temp would just be committed
                // to memory anyway, so we'll rewrite its appearances to be indirections
                // through the pointer parameter, the same as we'd do for this
                // parameter if it weren't promoted at all (otherwise the initialization
                // of the new temp would just be a needless memcpy at method entry).
                bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
                                     (varDsc->lvRefCnt <= varDsc->lvFieldCnt);

                if (!undoPromotion)
                {
                    // Insert IR that initializes the temp from the parameter.
                    // LHS is a simple reference to the temp.
                    fgEnsureFirstBBisScratch();
                    GenTreePtr lhs = gtNewLclvNode(newLclNum, varDsc->lvType);
                    // RHS is an indirection (using GT_OBJ) off the parameter.
                    GenTreePtr addr   = gtNewLclvNode(lclNum, TYP_BYREF);
                    GenTreePtr rhs    = gtNewBlockVal(addr, (unsigned)size);
                    GenTreePtr assign = gtNewAssignNode(lhs, rhs);
                    fgInsertStmtAtBeg(fgFirstBB, assign);
                }

                // Update the locals corresponding to the promoted fields.
                unsigned fieldLclStart = varDsc->lvFieldLclStart;
                unsigned fieldCount    = varDsc->lvFieldCnt;
                unsigned fieldLclStop  = fieldLclStart + fieldCount;

                for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
                {
                    LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];

                    if (undoPromotion)
                    {
                        // Leave lvParentLcl pointing to the parameter so that fgMorphImplicitByRefArgs
                        // will know to rewrite appearances of this local.
                        assert(fieldVarDsc->lvParentLcl == lclNum);
                    }
                    else
                    {
                        // Set the new parent.
                        fieldVarDsc->lvParentLcl = newLclNum;
                        // Clear the ref count field; it is used to communicate the nubmer of references
                        // to the implicit byref parameter when morphing calls that pass the implicit byref
                        // out as an outgoing argument value, but that doesn't pertain to this field local
                        // which is now a field of a non-arg local.
                        fieldVarDsc->lvRefCnt = 0;
                    }

                    fieldVarDsc->lvIsParam = false;
                    // The fields shouldn't inherit any register preferences from
                    // the parameter which is really a pointer to the struct.
                    fieldVarDsc->lvIsRegArg      = false;
                    fieldVarDsc->lvIsMultiRegArg = false;
                    fieldVarDsc->lvSetIsHfaRegArg(false);
                    fieldVarDsc->lvArgReg = REG_NA;
#if FEATURE_MULTIREG_ARGS
                    fieldVarDsc->lvOtherArgReg = REG_NA;
#endif
                    fieldVarDsc->lvPrefReg = 0;
                }

                if (undoPromotion)
                {
                    // Hijack lvFieldLclStart to record the new temp number.
                    // It will get fixed up in fgMarkDemotedImplicitByRefArgs.
                    varDsc->lvFieldLclStart = newLclNum;
                }
                else
                {
                    // Unmark the parameter as promoted (it's a pointer now).
                    varDsc->lvPromoted      = false;
                    varDsc->lvFieldCnt      = 0;
                    varDsc->lvFieldLclStart = 0;
                }
            }

            // Since the parameter in this position is really a pointer, its type is TYP_BYREF.
            varDsc->lvType = TYP_BYREF;

            // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
            // make sure that the following flag is not set as these will force SSA to
            // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
            //
            varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.

#ifdef DEBUG
            // This should not be converted to a double in stress mode,
            // because it is really a pointer
            varDsc->lvKeepType = 1;

            // The struct parameter may have had its address taken, but the pointer parameter
            // cannot -- any uses of the struct parameter's address are uses of the pointer
            // parameter's value, and there's no way for the MSIL to reference the pointer
            // parameter's address.  So clear the address-taken bit for the parameter.
            varDsc->lvAddrExposed     = 0;
            varDsc->lvDoNotEnregister = 0;

            if (verbose)
            {
                printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
            }
#endif // DEBUG
        }
    }

#endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
}

//------------------------------------------------------------------------
// fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
//                                 asked to promote but for which fgRetypeImplicitByRefArgs decided
//                                 to discard the promotion.  Appearances of these have now been
//                                 rewritten (by fgMorphImplicitByRefArgs) using indirections from
//                                 the pointer parameter.

void Compiler::fgMarkDemotedImplicitByRefArgs()
{
#if (defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) || defined(_TARGET_ARM64_)

    for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
    {
        LclVarDsc* varDsc = &lvaTable[lclNum];

        if (lvaIsImplicitByRefLocal(lclNum) && varDsc->lvPromoted)
        {
            // We stashed the pointer to the real promotion temp in lvFieldLclStart
            unsigned structLclNum = varDsc->lvFieldLclStart;

            // Unmark the parameter as promoted.
            varDsc->lvPromoted      = false;
            varDsc->lvFieldCnt      = 0;
            varDsc->lvFieldLclStart = 0;
            // Clear its ref count; this was set during address-taken analysis so that
            // call morphing could identify single-use implicit byrefs; we're done with
            // that, and want it to be in its default state of zero when we go to set
            // real ref counts for all variables.
            varDsc->lvRefCnt = 0;

            // The temp struct is now unused; set flags appropriately so that we
            // won't allocate space for it on the stack.
            LclVarDsc* structVarDsc     = &lvaTable[structLclNum];
            structVarDsc->lvRefCnt      = 0;
            structVarDsc->lvAddrExposed = false;
#ifdef DEBUG
            structVarDsc->lvUnusedStruct = true;
#endif // DEBUG

            unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
            unsigned fieldCount    = structVarDsc->lvFieldCnt;
            unsigned fieldLclStop  = fieldLclStart + fieldCount;

            for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
            {
                // Fix the pointer to the parent local.
                LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
                assert(fieldVarDsc->lvParentLcl == lclNum);
                fieldVarDsc->lvParentLcl = structLclNum;

                // The field local is now unused; set flags appropriately so that
                // we won't allocate stack space for it.
                fieldVarDsc->lvRefCnt      = 0;
                fieldVarDsc->lvAddrExposed = false;
            }
        }
    }

#endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
}

/*****************************************************************************
 *
 *  Morph irregular parameters
 *    for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
 */
bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree)
{
#if (!defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)) && !defined(_TARGET_ARM64_)

    return false;

#else  // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_

    bool changed = false;

    // Implicit byref morphing needs to know if the reference to the parameter is a
    // child of GT_ADDR or not, so this method looks one level down and does the
    // rewrite whenever a child is a reference to an implicit byref parameter.
    if (tree->gtOper == GT_ADDR)
    {
        if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
        {
            GenTreePtr morphedTree = fgMorphImplicitByRefArgs(tree, true);
            changed                = (morphedTree != nullptr);
            assert(!changed || (morphedTree == tree));
        }
    }
    else
    {
        for (GenTreePtr* pTree : tree->UseEdges())
        {
            GenTreePtr childTree = *pTree;
            if (childTree->gtOper == GT_LCL_VAR)
            {
                GenTreePtr newChildTree = fgMorphImplicitByRefArgs(childTree, false);
                if (newChildTree != nullptr)
                {
                    changed = true;
                    *pTree  = newChildTree;
                }
            }
        }
    }

    return changed;
#endif // (_TARGET_AMD64_ && !FEATURE_UNIX_AMD64_STRUCT_PASSING) || _TARGET_ARM64_
}

GenTreePtr Compiler::fgMorphImplicitByRefArgs(GenTreePtr tree, bool isAddr)
{
    assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
    assert(isAddr == (tree->gtOper == GT_ADDR));

    GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
    unsigned   lclNum     = lclVarTree->gtLclVarCommon.gtLclNum;
    LclVarDsc* lclVarDsc  = &lvaTable[lclNum];

    CORINFO_FIELD_HANDLE fieldHnd;
    unsigned             fieldOffset  = 0;
    var_types            fieldRefType = TYP_UNKNOWN;

    if (lvaIsImplicitByRefLocal(lclNum))
    {
        // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
        // re-invoke the traversal to mark address-taken locals.
        // So, we may encounter a tree that has already been transformed to TYP_BYREF.
        // If we do, leave it as-is.
        if (!varTypeIsStruct(lclVarTree))
        {
            assert(lclVarTree->TypeGet() == TYP_BYREF);
            return nullptr;
        }

        fieldHnd = nullptr;
    }
    else if (lclVarDsc->lvIsStructField && lvaIsImplicitByRefLocal(lclVarDsc->lvParentLcl))
    {
        // This was a field reference to an implicit-by-reference struct parameter that was
        // dependently promoted; update it to a field reference off the pointer.
        // Grab the field handle from the struct field lclVar.
        fieldHnd    = lclVarDsc->lvFieldHnd;
        fieldOffset = lclVarDsc->lvFldOffset;
        assert(fieldHnd != nullptr);
        // Update lclNum/lclVarDsc to refer to the parameter
        lclNum       = lclVarDsc->lvParentLcl;
        lclVarDsc    = &lvaTable[lclNum];
        fieldRefType = lclVarTree->TypeGet();
    }
    else
    {
        // We only need to tranform the 'marked' implicit by ref parameters
        return nullptr;
    }

    // This is no longer a def of the lclVar, even if it WAS a def of the struct.
    lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);

    if (isAddr)
    {
        if (fieldHnd == nullptr)
        {
            // change &X into just plain X
            tree->CopyFrom(lclVarTree, this);
            tree->gtType = TYP_BYREF;
        }
        else
        {
            // change &(X.f) [i.e. GT_ADDR of local for promoted arg field]
            // into &(X, f) [i.e. GT_ADDR of GT_FIELD off ptr param]
            lclVarTree->gtLclVarCommon.SetLclNum(lclNum);
            lclVarTree->gtType = TYP_BYREF;
            tree->gtOp.gtOp1   = gtNewFieldRef(fieldRefType, fieldHnd, lclVarTree, fieldOffset);
        }

#ifdef DEBUG
        if (verbose)
        {
            printf("Replacing address of implicit by ref struct parameter with byref:\n");
        }
#endif // DEBUG
    }
    else
    {
        // Change X into OBJ(X) or FIELD(X, f)
        var_types structType = tree->gtType;
        tree->gtType         = TYP_BYREF;

        if (fieldHnd)
        {
            tree->gtLclVarCommon.SetLclNum(lclNum);
            tree = gtNewFieldRef(fieldRefType, fieldHnd, tree, fieldOffset);
        }
        else
        {
            tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
        }

        if (structType == TYP_STRUCT)
        {
            gtSetObjGcInfo(tree->AsObj());
        }

        // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
        // we could remove TGTANYWHERE
        tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);

#ifdef DEBUG
        if (verbose)
        {
            printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
        }
#endif // DEBUG
    }

#ifdef DEBUG
    if (verbose)
    {
        gtDispTree(tree);
    }
#endif // DEBUG

    return tree;
}

// An "AddrExposedContext" expresses the calling context in which an address expression occurs.
enum AddrExposedContext
{
    AXC_None,     // None of the below seen yet.
    AXC_Ind,      // The address being computed is to be dereferenced.
    AXC_Addr,     // We're computing a raw address (not dereferenced, at least not immediately).
    AXC_IndWide,  // A block operation dereferenced an address referencing more bytes than the address
                  // addresses -- if the address addresses a field of a struct local, we need to consider
                  // the entire local address taken (not just the field).
    AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
                  // on more bytes than the width of the storage location addressed.  If this is a
                  // field of a promoted struct local, declare the entire struct local address-taken.
    AXC_IndAdd,   // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
                  // If one arg is a constant int, evaluate the other in an IND context.  Otherwise, none.
};

typedef ArrayStack<AddrExposedContext> AXCStack;

// We use pre-post to simulate passing an argument in a recursion, via a stack.
Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
{
    AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
    (void)axcStack->Pop();
    return WALK_CONTINUE;
}

Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
{
    GenTreePtr         tree     = *pTree;
    Compiler*          comp     = fgWalkPre->compiler;
    AXCStack*          axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
    AddrExposedContext axc      = axcStack->Top();

    // In some situations, we have to figure out what the effective context is in which to
    // evaluate the current tree, depending on which argument position it is in its parent.

    switch (axc)
    {

        case AXC_IndAdd:
        {
            GenTreePtr parent = fgWalkPre->parentStack->Index(1);
            assert(parent->OperGet() == GT_ADD);
            // Is one of the args a constant representing a field offset,
            // and is this the other?  If so, Ind context.
            if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
            {
                axc = AXC_Ind;
            }
            else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
            {
                axc = AXC_Ind;
            }
            else
            {
                axc = AXC_None;
            }
        }
        break;

        default:
            break;
    }

    // Now recurse properly for the tree.
    switch (tree->gtOper)
    {
        case GT_IND:
            if (axc != AXC_Addr)
            {
                axcStack->Push(AXC_Ind);
            }
            else
            {
                axcStack->Push(AXC_None);
            }
            return WALK_CONTINUE;

        case GT_BLK:
        case GT_OBJ:
            if (axc == AXC_Addr)
            {
                axcStack->Push(AXC_None);
            }
            else if (tree->TypeGet() == TYP_STRUCT)
            {
                // The block operation will derefence its argument(s) -- usually.  If the size of the initblk
                // or copyblk exceeds the size of a storage location whose address is used as one of the
                // arguments, then we have to consider that storage location (indeed, it's underlying containing
                // location) to be address taken.  So get the width of the initblk or copyblk.

                GenTreePtr  parent = fgWalkPre->parentStack->Index(1);
                GenTreeBlk* blk    = tree->AsBlk();
                unsigned    width  = blk->gtBlkSize;
                noway_assert(width != 0);
                axc           = AXC_Ind;
                GenTree* addr = blk->Addr();
                if (addr->OperGet() == GT_ADDR)
                {
                    if (parent->gtOper == GT_ASG)
                    {
                        if ((tree == parent->gtOp.gtOp1) &&
                            ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
                        {
                            axc = AXC_IndWide;
                        }
                    }
                    else
                    {
                        assert(parent->gtOper == GT_CALL);
                    }
                }
                axcStack->Push(axc);
            }
            else
            {
                // This is like a regular GT_IND.
                axcStack->Push(AXC_Ind);
            }
            return WALK_CONTINUE;

        case GT_DYN_BLK:
            // Assume maximal width.
            axcStack->Push(AXC_IndWide);
            return WALK_CONTINUE;

        case GT_LIST:
        case GT_FIELD_LIST:
            axcStack->Push(AXC_None);
            return WALK_CONTINUE;

        case GT_INDEX:
            // Taking the address of an array element never takes the address of a local.
            axcStack->Push(AXC_None);
            return WALK_CONTINUE;

        case GT_ADDR:
#ifdef FEATURE_SIMD
            if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
            {
                axcStack->Push(AXC_None);
            }
            else
#endif // FEATURE_SIMD
                if (axc == AXC_Ind)
            {
                axcStack->Push(AXC_None);
            }
            else if (axc == AXC_IndWide)
            {
                axcStack->Push(AXC_AddrWide);
            }
            else
            {
                assert(axc == AXC_None);
                axcStack->Push(AXC_Addr);
            }
            return WALK_CONTINUE;

        case GT_FIELD:
            // First, handle a couple of special cases: field of promoted struct local, field
            // of "normed" struct.
            if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
            {
                // It (may have) replaced the field with a local var or local field.  If we're in an addr context,
                // label it addr-taken.
                if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
                {
                    unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
                    comp->lvaSetVarAddrExposed(lclNum);
                    if (axc == AXC_AddrWide)
                    {
                        LclVarDsc* varDsc = &comp->lvaTable[lclNum];
                        if (varDsc->lvIsStructField)
                        {
                            comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
                        }
                    }
                }
                // Push something to keep the PostCB, which will pop it, happy.
                axcStack->Push(AXC_None);
                return WALK_SKIP_SUBTREES;
            }
            else
            {
                // GT_FIELD is an implicit deref.
                if (axc == AXC_Addr)
                {
                    axcStack->Push(AXC_None);
                }
                else if (axc == AXC_AddrWide)
                {
                    axcStack->Push(AXC_IndWide);
                }
                else
                {
                    axcStack->Push(AXC_Ind);
                }
                return WALK_CONTINUE;
            }

        case GT_LCL_FLD:
        {
            assert(axc != AXC_Addr);
            unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
            if (comp->lvaIsImplicitByRefLocal(lclNum))
            {
                // Keep track of the number of appearances of each promoted implicit
                // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
                // checks the ref counts for implicit byref params when deciding if it's legal
                // to elide certain copies of them.
                LclVarDsc* varDsc = &comp->lvaTable[lclNum];
                JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
                        varDsc->lvRefCnt + 1, lclNum);

                varDsc->lvRefCnt++;
            }
            // This recognizes certain forms, and does all the work.  In that case, returns WALK_SKIP_SUBTREES,
            // else WALK_CONTINUE.  We do the same here.
            fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
            if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
            {
                comp->lvaSetVarAddrExposed(lclNum);
                if (axc == AXC_AddrWide)
                {
                    LclVarDsc* varDsc = &comp->lvaTable[lclNum];
                    if (varDsc->lvIsStructField)
                    {
                        comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
                    }
                }
            }
            // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
            // what, but something to be popped by the post callback.  If we're going
            // to analyze children, the LCL_FLD creates an Ind context, so use that.
            axcStack->Push(AXC_Ind);
            return res;
        }

        case GT_LCL_VAR:
        {
            unsigned   lclNum = tree->gtLclVarCommon.gtLclNum;
            LclVarDsc* varDsc = &comp->lvaTable[lclNum];

            if (comp->lvaIsImplicitByRefLocal(lclNum))
            {
                // Keep track of the number of appearances of each promoted implicit
                // byref (here during address-exposed analysis); fgMakeOutgoingStructArgCopy
                // checks the ref counts for implicit byref params when deciding if it's legal
                // to elide certain copies of them.
                JITDUMP("Incrementing ref count from %d to %d for V%02d in fgMorphStructField\n", varDsc->lvRefCnt,
                        varDsc->lvRefCnt + 1, lclNum);

                varDsc->lvRefCnt++;
            }

            if (axc == AXC_Addr || axc == AXC_AddrWide)
            {
                comp->lvaSetVarAddrExposed(lclNum);
                if (axc == AXC_AddrWide)
                {
                    if (varDsc->lvIsStructField)
                    {
                        comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
                    }
                }

                // We may need to Quirk the storage size for this LCL_VAR
                // some PInvoke signatures incorrectly specify a ByRef to an INT32
                // when they actually write a SIZE_T or INT64
                if (axc == AXC_Addr)
                {
                    comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
                }
            }
            // Push something to keep the PostCB, which will pop it, happy.
            axcStack->Push(AXC_None);
            // The tree is a leaf.
            return WALK_SKIP_SUBTREES;
        }

        case GT_ADD:
            assert(axc != AXC_Addr);
            // See below about treating pointer operations as wider indirection.
            if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
            {
                axcStack->Push(AXC_IndWide);
            }
            else if (axc == AXC_Ind)
            {
                // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
                // If it's an add of a constant and an address, and the constant represents a field,
                // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
                axcStack->Push(AXC_IndAdd);
            }
            else
            {
                axcStack->Push(axc);
            }
            return WALK_CONTINUE;

        // !!! Treat Pointer Operations as Wider Indirection
        //
        // If we are performing pointer operations, make sure we treat that as equivalent to a wider
        // indirection. This is because the pointers could be pointing to the address of struct fields
        // and could be used to perform operations on the whole struct or passed to another method.
        //
        // When visiting a node in this pre-order walk, we do not know if we would in the future
        // encounter a GT_ADDR of a GT_FIELD below.
        //
        // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
        // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
        // wider indirection context down the expr tree.
        //
        // Example, in unsafe code,
        //
        //   IL_000e  12 00             ldloca.s     0x0
        //   IL_0010  7c 02 00 00 04    ldflda       0x4000002
        //   IL_0015  12 00             ldloca.s     0x0
        //   IL_0017  7c 01 00 00 04    ldflda       0x4000001
        //   IL_001c  59                sub
        //
        // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
        // consider GT_SUB to be equivalent of an AXC_IndWide.
        //
        // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
        // them as AXC_IndWide.
        //

        // BINOP
        case GT_SUB:
        case GT_MUL:
        case GT_DIV:
        case GT_UDIV:
        case GT_OR:
        case GT_XOR:
        case GT_AND:
        case GT_LSH:
        case GT_RSH:
        case GT_RSZ:
        case GT_ROL:
        case GT_ROR:
        case GT_EQ:
        case GT_NE:
        case GT_LT:
        case GT_LE:
        case GT_GT:
        case GT_GE:
        // UNOP
        case GT_CAST:
            if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
                (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
            {
                axcStack->Push(AXC_IndWide);
                return WALK_CONTINUE;
            }
            __fallthrough;

        default:
            // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None".  We must
            // handle the "Ind" propogation explicitly above.
            if (axc == AXC_Addr || axc == AXC_AddrWide)
            {
                axcStack->Push(axc);
            }
            else
            {
                axcStack->Push(AXC_None);
            }
            return WALK_CONTINUE;
    }
}

bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
{
    if (tree->TypeGet() != TYP_STRUCT)
    {
        return width <= genTypeSize(tree->TypeGet());
    }
    else if (tree->OperGet() == GT_LCL_VAR)
    {
        assert(tree->TypeGet() == TYP_STRUCT);
        unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
        return width <= lvaTable[lclNum].lvExactSize;
    }
    else if (tree->OperGet() == GT_FIELD)
    {
        CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
        return width <= info.compCompHnd->getClassSize(fldClass);
    }
    else if (tree->OperGet() == GT_INDEX)
    {
        return width <= tree->gtIndex.gtIndElemSize;
    }
    else
    {
        return false;
    }
}

void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
{
    assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);

    switch (op1->OperGet())
    {
        case GT_ADDR:
            if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
            {
                GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
                lclFld->gtFieldSeq    = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
            }
            break;

        case GT_ADD:
            if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
            {
                FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
                if (op1Fs != nullptr)
                {
                    op1Fs                                = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
                    op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
                }
            }
            else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
            {
                FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
                if (op2Fs != nullptr)
                {
                    op2Fs                                = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
                    op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
                }
            }
            break;

        case GT_CNS_INT:
        {
            FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
            if (op1Fs != nullptr)
            {
                op1Fs                    = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
                op1->gtIntCon.gtFieldSeq = op1Fs;
            }
        }
        break;

        default:
            // Record in the general zero-offset map.
            GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
            break;
    }
}

/*****************************************************************************
 *
 *  Mark address-taken locals.
 */

void Compiler::fgMarkAddressExposedLocals()
{
#ifdef DEBUG
    if (verbose)
    {
        printf("\n*************** In fgMarkAddressExposedLocals()\n");
    }
#endif // DEBUG

    BasicBlock* block = fgFirstBB;
    noway_assert(block);

    do
    {
        /* Make the current basic block address available globally */

        compCurBB = block;

        GenTreePtr stmt;

        for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
        {
            // Call Compiler::fgMarkAddrTakenLocalsCB on each node
            AXCStack stk(this);
            stk.Push(AXC_None); // We start in neither an addr or ind context.
            fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
        }

        block = block->bbNext;

    } while (block);
}

// fgNodesMayInterfere:
//   return true if moving nodes relative to each other can change the result of a computation
//
// args:
//   read: a node which reads
//

bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
{
    LclVarDsc* srcVar = nullptr;

    bool readIsIndir  = read->OperIsIndir() || read->OperIsImplicitIndir();
    bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();

    if (read->OperIsLocal())
    {
        srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
    }

    if (writeIsIndir)
    {
        if (srcVar && srcVar->lvAddrExposed)
        {
            return true;
        }
        else if (readIsIndir)
        {
            return true;
        }
        return false;
    }
    else if (write->OperIsLocal())
    {
        LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
        if (readIsIndir)
        {
            return dstVar->lvAddrExposed;
        }
        else if (read->OperIsLocal())
        {
            if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
            {
                return true;
            }
            return false;
        }
        else
        {
            return false;
        }
    }
    else
    {
        return false;
    }
}

/** This predicate decides whether we will fold a tree with the structure:
 *  x = x <op> y where x could be any arbitrary expression into
 *  x <op>= y.
 *
 *  This modification is only performed when the target architecture supports
 *  complex addressing modes.  In the case of ARM for example, this transformation
 *  yields no benefit.
 *
 *  In case this functions decides we can proceed to fold into an assignment operator
 *  we need to inspect whether the operator is commutative to tell fgMorph whether we need to
 *  reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
 *  x <op>= y because the operator property.
 */
bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
{
#if CPU_LOAD_STORE_ARCH
    /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
    return false;
#elif !defined(LEGACY_BACKEND)
    return false;
#else  // defined(LEGACY_BACKEND)

    GenTreePtr op1  = tree->gtOp.gtOp1;
    GenTreePtr op2  = tree->gtGetOp2();
    genTreeOps cmop = op2->OperGet();

    /* Is the destination identical to the first RHS sub-operand? */
    if (GenTree::Compare(op1, op2->gtOp.gtOp1))
    {
        /*
        Do not transform the following tree

        [0024CFA4] -----------               const     int    1
        [0024CFDC] ----G------               |         int
        [0024CF5C] -----------               lclVar    ubyte  V01 tmp0
        [0024D05C] -A--G------               =         ubyte
        [0024D014] D------N---               lclVar    ubyte  V01 tmp0

        to

        [0024CFA4] -----------               const     int    1
        [0024D05C] -A--G------               |=        ubyte
        [0024D014] U------N---               lclVar    ubyte  V01 tmp0

        , when V01 is a struct field local.
        */

        if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
        {
            unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
            LclVarDsc* varDsc = lvaTable + lclNum;

            if (varDsc->lvIsStructField)
            {
                return false;
            }
        }

        *bReverse = false;
        return true;
    }
    else if (GenTree::OperIsCommutative(cmop))
    {
        /* For commutative ops only, check for "a = x <op> a" */

        /* Should we be doing this at all? */
        if ((opts.compFlags & CLFLG_TREETRANS) == 0)
        {
            return false;
        }

        /* Can we swap the operands to cmop ... */
        if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
        {
            // Both sides must have side effects to prevent swap */
            return false;
        }

        /* Is the destination identical to the second RHS sub-operand? */
        if (GenTree::Compare(op1, op2->gtOp.gtOp2))
        {
            *bReverse = true;
            return true;
        }
    }
    return false;
#endif // defined(LEGACY_BACKEND)
}

#ifdef FEATURE_SIMD

//-----------------------------------------------------------------------------------
// fgMorphCombineSIMDFieldAssignments:
//  If the RHS of the input stmt is a read for simd vector X Field, then this function
//  will keep reading next few stmts based on the vector size(2, 3, 4).
//  If the next stmts LHS are located contiguous and RHS are also located
//  contiguous, then we replace those statements with a copyblk.
//
// Argument:
//  block - BasicBlock*. block which stmt belongs to
//  stmt  - GenTreeStmt*. the stmt node we want to check
//
// return value:
//  if this funciton successfully optimized the stmts, then return true. Otherwise
//  return false;

bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
{

    noway_assert(stmt->gtOper == GT_STMT);
    GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
    assert(tree->OperGet() == GT_ASG);

    GenTreePtr originalLHS    = tree->gtOp.gtOp1;
    GenTreePtr prevLHS        = tree->gtOp.gtOp1;
    GenTreePtr prevRHS        = tree->gtOp.gtOp2;
    unsigned   index          = 0;
    var_types  baseType       = TYP_UNKNOWN;
    unsigned   simdSize       = 0;
    GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);

    if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
    {
        // if the RHS is not from a SIMD vector field X, then there is no need to check further.
        return false;
    }

    var_types  simdType             = getSIMDTypeForSize(simdSize);
    int        assignmentsCount     = simdSize / genTypeSize(baseType) - 1;
    int        remainingAssignments = assignmentsCount;
    GenTreePtr curStmt              = stmt->gtNext;
    GenTreePtr lastStmt             = stmt;

    while (curStmt != nullptr && remainingAssignments > 0)
    {
        GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
        if (exp->OperGet() != GT_ASG)
        {
            break;
        }
        GenTreePtr curLHS = exp->gtGetOp1();
        GenTreePtr curRHS = exp->gtGetOp2();

        if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
        {
            break;
        }

        remainingAssignments--;
        prevLHS = curLHS;
        prevRHS = curRHS;

        lastStmt = curStmt;
        curStmt  = curStmt->gtNext;
    }

    if (remainingAssignments > 0)
    {
        // if the left assignments number is bigger than zero, then this means
        // that the assignments are not assgining to the contiguously memory
        // locations from same vector.
        return false;
    }
#ifdef DEBUG
    if (verbose)
    {
        printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
        printf("From BB%02u, stmt", block->bbNum);
        printTreeID(stmt);
        printf(" to stmt");
        printTreeID(lastStmt);
        printf("\n");
    }
#endif

    for (int i = 0; i < assignmentsCount; i++)
    {
        fgRemoveStmt(block, stmt->gtNext);
    }

    GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
    if (simdStructNode->OperIsLocal())
    {
        setLclRelatedToSIMDIntrinsic(simdStructNode);
    }
    GenTree* copyBlkAddr = copyBlkDst;
    if (copyBlkAddr->gtOper == GT_LEA)
    {
        copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
    }
    GenTreeLclVarCommon* localDst = nullptr;
    if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
    {
        setLclRelatedToSIMDIntrinsic(localDst);
    }

    GenTree* simdStructAddr;
    if (simdStructNode->TypeGet() == TYP_BYREF)
    {
        assert(simdStructNode->OperIsLocal());
        assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
        simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
    }
    else
    {
        assert(varTypeIsSIMD(simdStructNode));
    }

#ifdef DEBUG
    if (verbose)
    {
        printf("\nBB%02u stmt", block->bbNum);
        printTreeID(stmt);
        printf("(before)\n");
        gtDispTree(stmt);
    }
#endif

    // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
    GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
    blkNode->gtType  = simdType;
    tree             = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
                          false, // not volatile
                          true); // copyBlock

    stmt->gtStmt.gtStmtExpr = tree;

    // Since we generated a new address node which didn't exist before,
    // we should expose this address manually here.
    AXCStack stk(this);
    stk.Push(AXC_None);
    fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);

#ifdef DEBUG
    if (verbose)
    {
        printf("\nReplaced BB%02u stmt", block->bbNum);
        printTreeID(stmt);
        printf("(after)\n");
        gtDispTree(stmt);
    }
#endif
    return true;
}

#endif // FEATURE_SIMD
